From 13d78ef202885d1f1e6d375503e86c49b9b33d3f Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Fri, 7 Nov 2025 11:06:53 -0800 Subject: [PATCH 1/5] [RISCV] Adopt SpacemitX60's scheduling model for `-mtune=generic` --- llvm/docs/RISCVUsage.rst | 13 +++++++++++++ llvm/docs/ReleaseNotes.md | 1 + llvm/lib/Target/RISCV/RISCVProcessors.td | 16 +++++++++++----- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index d03f383a92b3b..64b486d71c1ae 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -601,3 +601,16 @@ Clang's ``-msmall-data-limit=`` option controls what the threshold size is (in b The small data limit threshold is also used to separate small constants into sections with names starting with ``.srodata``. LLD does not place these with the ``.sdata`` and ``.sbss`` sections as ``.srodata`` sections are read only and the other two are writable. Instead the ``.srodata`` sections are placed adjacent to ``.rodata``. Data suggests that these options can produce significant improvements across a range of benchmarks. + +Scheduling Model and Tuning +=========================== + +RISC-V is highly configurable, meaning its scheduling models could be highly diversified as well. Yet we still believe it is helpful to provide a "generic" tuning processor / scheduling model that represents the "lowest common denominator" RISC-V implementation at the time. The idea is that it could serve as a "good-enough" baseline model for performance tuning purposes on some of the most common use cases. + +Though details of this generic scheduling model might evolve over time, we always have some _expectations_ on the kind of processors it is used for. + +For example, the ``generic`` tuning processor is expected to target in-order application processors designed for general-purpose computing. It is usually (but not required to be) RVA22U64- or RVA23U64-capable. The ``generic-ooo`` has a similar set of expectations, except it is targeting out-of-order application processors. + +Right now, we simply assign a scheduling model that is widely used by the community to ``generic``. But in the future, we can create a standalone scheduling model for ``generic``, or even create a generic model for each of the individual sectors. For example, a ``generic-embedded`` for embedded processors and a ``generic-server`` for server workloads. + +These future generic models could even serve as the "base" model for other scheduling models to derive from: it's not uncommon for multiple processors to share a similar set of instruction scheduling info except a few key instructions, and this is especially true for RISC-V given its highly configurable nature. If we could design the base model in a way that it can be _parameterized_ by subtarget tuning features, we can substitue the traditional way of creating individual scheduling models with a combination of base scheduling model + different subtarget features. diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 23bba99ec874f..d330120214705 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -138,6 +138,7 @@ Changes to the RISC-V Backend * Adds experimental support for the 'Zibi` (Branch with Immediate) extension. * Add support for Zvfofp8min (OFP8 conversion extension) * Adds assembler support for the Andes `XAndesvsinth` (Andes Vector Small Int Handling Extension). +* `-mtune=generic` now uses the scheduling model from SpacemitX60 instead of an empty scheduling model. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index e86431f78f1ba..136e89687fa55 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -88,6 +88,17 @@ class RISCVTuneProcessorModel, + GenericTuneInfo; +// `generic-ooo` is expected to target out-of-order application processors designed +// for general-purpose computing. +def GENERIC_OOO : RISCVTuneProcessorModel<"generic-ooo", GenericOOOModel>, + GenericTuneInfo; + def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32", NoSchedModel, [Feature32Bit, @@ -100,11 +111,6 @@ def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", FeatureStdExtI], GenericTuneFeatures>, GenericTuneInfo; -// Support generic for compatibility with other targets. The triple will be used -// to change to the appropriate rv32/rv64 version. -def GENERIC : RISCVTuneProcessorModel<"generic", NoSchedModel>, GenericTuneInfo; -def GENERIC_OOO : RISCVTuneProcessorModel<"generic-ooo", GenericOOOModel>, - GenericTuneInfo; def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", MIPSP8700Model, From f004f865520037270ad6c1d4385f4cf0ed4f1494 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Fri, 7 Nov 2025 11:28:54 -0800 Subject: [PATCH 2/5] fixup! Add the description on superscalar and Linux --- llvm/docs/RISCVUsage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 64b486d71c1ae..e213db332cf38 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -609,7 +609,7 @@ RISC-V is highly configurable, meaning its scheduling models could be highly div Though details of this generic scheduling model might evolve over time, we always have some _expectations_ on the kind of processors it is used for. -For example, the ``generic`` tuning processor is expected to target in-order application processors designed for general-purpose computing. It is usually (but not required to be) RVA22U64- or RVA23U64-capable. The ``generic-ooo`` has a similar set of expectations, except it is targeting out-of-order application processors. +For example, the ``generic`` tuning processor is expected to target in-order, superscalar application processors designed for general-purpose computing. It is usually RVA22U64- or RVA23U64-capable intended to run Linux. The ``generic-ooo`` has a similar set of expectations, except it is targeting out-of-order application processors. Right now, we simply assign a scheduling model that is widely used by the community to ``generic``. But in the future, we can create a standalone scheduling model for ``generic``, or even create a generic model for each of the individual sectors. For example, a ``generic-embedded`` for embedded processors and a ``generic-server`` for server workloads. From 0fc7c742a85976cbb7e44a9e13c165e9fadb027c Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Fri, 7 Nov 2025 11:50:44 -0800 Subject: [PATCH 3/5] fixup! Formatting --- llvm/lib/Target/RISCV/RISCVProcessors.td | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 136e89687fa55..9e31d08ae2243 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -88,17 +88,6 @@ class RISCVTuneProcessorModel, - GenericTuneInfo; -// `generic-ooo` is expected to target out-of-order application processors designed -// for general-purpose computing. -def GENERIC_OOO : RISCVTuneProcessorModel<"generic-ooo", GenericOOOModel>, - GenericTuneInfo; - def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32", NoSchedModel, [Feature32Bit, @@ -111,6 +100,16 @@ def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", FeatureStdExtI], GenericTuneFeatures>, GenericTuneInfo; +// Support generic for compatibility with other targets. The triple will be used +// to change to the appropriate rv32/rv64 version. +// `generic` is expected to target in-order application processors designed for +// general-purpose computing. +def GENERIC : RISCVTuneProcessorModel<"generic", SpacemitX60Model>, + GenericTuneInfo; +// `generic-ooo` is expected to target out-of-order application processors designed +// for general-purpose computing. +def GENERIC_OOO : RISCVTuneProcessorModel<"generic-ooo", GenericOOOModel>, + GenericTuneInfo; def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", MIPSP8700Model, From fcd1e2f357f27463945f97a32c360d94f8a23e5b Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Fri, 7 Nov 2025 15:42:06 -0800 Subject: [PATCH 4/5] fixup! Set `generic-rv32` and `generic-rv64` to SpecmitX60 as well And update the tests. --- llvm/lib/Target/RISCV/RISCVProcessors.td | 8 +- llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll | 4 +- .../CodeGen/RISCV/GlobalISel/alu-roundtrip.ll | 21 +- .../RISCV/GlobalISel/atomic-cmpxchg.ll | 2048 +- .../RISCV/GlobalISel/atomicrmw-add-sub.ll | 157 +- .../test/CodeGen/RISCV/GlobalISel/bitmanip.ll | 120 +- .../RISCV/GlobalISel/combine-neg-abs.ll | 176 +- .../RISCV/GlobalISel/constbarrier-rv32.ll | 33 +- .../RISCV/GlobalISel/div-by-constant.ll | 214 +- .../CodeGen/RISCV/GlobalISel/double-arith.ll | 8 +- .../RISCV/GlobalISel/double-intrinsics.ll | 10 +- .../CodeGen/RISCV/GlobalISel/float-arith.ll | 6 +- .../RISCV/GlobalISel/float-intrinsics.ll | 96 +- llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll | 4 +- .../CodeGen/RISCV/GlobalISel/rotl-rotr.ll | 1204 +- .../CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll | 42 +- llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll | 150 +- .../test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll | 8 +- llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll | 10 +- llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll | 490 +- .../test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll | 18 +- llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll | 8 +- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 6 +- llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll | 286 +- llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll | 8 +- llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll | 302 +- ...lar-shift-by-byte-multiple-legalization.ll | 16721 ++++++++-------- llvm/test/CodeGen/RISCV/abds-neg.ll | 602 +- llvm/test/CodeGen/RISCV/abds.ll | 1108 +- llvm/test/CodeGen/RISCV/abdu-neg.ll | 620 +- llvm/test/CodeGen/RISCV/abdu.ll | 1024 +- llvm/test/CodeGen/RISCV/add-before-shl.ll | 50 +- .../CodeGen/RISCV/add_sext_shl_constant.ll | 138 +- llvm/test/CodeGen/RISCV/add_shl_constant.ll | 84 +- .../test/CodeGen/RISCV/addc-adde-sube-subc.ll | 2 +- llvm/test/CodeGen/RISCV/addcarry.ll | 32 +- llvm/test/CodeGen/RISCV/addimm-mulimm.ll | 228 +- llvm/test/CodeGen/RISCV/alu64.ll | 32 +- llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll | 2 +- .../test/CodeGen/RISCV/arith-with-overflow.ll | 10 +- .../RISCV/atomic-cmpxchg-branch-on-result.ll | 130 +- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 3072 +-- llvm/test/CodeGen/RISCV/atomic-rmw.ll | 15388 +++++++------- llvm/test/CodeGen/RISCV/atomic-signext.ll | 1628 +- .../CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll | 184 +- .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 278 +- llvm/test/CodeGen/RISCV/avgceils.ll | 72 +- llvm/test/CodeGen/RISCV/avgceilu.ll | 72 +- llvm/test/CodeGen/RISCV/avgfloors.ll | 68 +- llvm/test/CodeGen/RISCV/avgflooru.ll | 36 +- llvm/test/CodeGen/RISCV/bf16-promote.ll | 20 +- llvm/test/CodeGen/RISCV/bfloat-arith.ll | 188 +- llvm/test/CodeGen/RISCV/bfloat-convert.ll | 392 +- llvm/test/CodeGen/RISCV/bfloat-imm.ll | 2 +- llvm/test/CodeGen/RISCV/bfloat-mem.ll | 4 +- llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll | 8 +- llvm/test/CodeGen/RISCV/bfloat.ll | 74 +- llvm/test/CodeGen/RISCV/bittest.ll | 16 +- llvm/test/CodeGen/RISCV/bswap-bitreverse.ll | 1518 +- .../test/CodeGen/RISCV/callee-saved-fpr32s.ll | 384 +- .../test/CodeGen/RISCV/callee-saved-fpr64s.ll | 240 +- llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 452 +- llvm/test/CodeGen/RISCV/calling-conv-half.ll | 518 +- .../RISCV/calling-conv-ilp32-ilp32f-common.ll | 100 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 252 +- .../test/CodeGen/RISCV/calling-conv-ilp32d.ll | 74 +- .../test/CodeGen/RISCV/calling-conv-ilp32e.ll | 600 +- .../calling-conv-ilp32f-ilp32d-common.ll | 46 +- .../calling-conv-lp64-lp64f-lp64d-common.ll | 132 +- .../RISCV/calling-conv-vector-float.ll | 44 +- llvm/test/CodeGen/RISCV/calls.ll | 12 +- llvm/test/CodeGen/RISCV/cmov-branch-opt.ll | 2 +- .../CodeGen/RISCV/combine-storetomstore.ll | 28 +- llvm/test/CodeGen/RISCV/compress.ll | 64 +- llvm/test/CodeGen/RISCV/condbinops.ll | 92 +- llvm/test/CodeGen/RISCV/condops.ll | 48 +- .../CodeGen/RISCV/constpool-known-bits.ll | 16 +- llvm/test/CodeGen/RISCV/copyprop.ll | 16 +- llvm/test/CodeGen/RISCV/copysign-casts.ll | 104 +- llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 1418 +- .../CodeGen/RISCV/ctz_zero_return_test.ll | 530 +- llvm/test/CodeGen/RISCV/div-by-constant.ll | 102 +- llvm/test/CodeGen/RISCV/div-pow2.ll | 88 +- llvm/test/CodeGen/RISCV/div.ll | 14 +- llvm/test/CodeGen/RISCV/div_minsize.ll | 186 +- llvm/test/CodeGen/RISCV/double-arith.ll | 2 +- .../RISCV/double-bitmanip-dagcombines.ll | 6 +- .../test/CodeGen/RISCV/double-calling-conv.ll | 60 +- llvm/test/CodeGen/RISCV/double-convert.ll | 388 +- llvm/test/CodeGen/RISCV/double-fcmp-strict.ll | 148 +- llvm/test/CodeGen/RISCV/double-intrinsics.ll | 84 +- llvm/test/CodeGen/RISCV/double-mem.ll | 12 +- .../CodeGen/RISCV/double-round-conv-sat.ll | 756 +- llvm/test/CodeGen/RISCV/double-round-conv.ll | 70 +- llvm/test/CodeGen/RISCV/double-select-fcmp.ll | 4 +- .../RISCV/double-stack-spill-restore.ll | 4 +- llvm/test/CodeGen/RISCV/double_reduct.ll | 8 +- .../early-clobber-tied-def-subreg-liveness.ll | 14 +- llvm/test/CodeGen/RISCV/fastcc-bf16.ll | 6 +- llvm/test/CodeGen/RISCV/fastcc-float.ll | 6 +- llvm/test/CodeGen/RISCV/fastcc-half.ll | 6 +- .../CodeGen/RISCV/fastcc-without-f-reg.ll | 24 +- .../RISCV/float-bit-preserving-dagcombines.ll | 16 +- .../RISCV/float-bitmanip-dagcombines.ll | 4 +- llvm/test/CodeGen/RISCV/float-convert.ll | 345 +- llvm/test/CodeGen/RISCV/float-fcmp-strict.ll | 100 +- llvm/test/CodeGen/RISCV/float-intrinsics.ll | 134 +- .../CodeGen/RISCV/float-round-conv-sat.ll | 672 +- llvm/test/CodeGen/RISCV/float-round-conv.ll | 120 +- .../RISCV/fold-addi-loadstore-zilsd.ll | 2 +- .../test/CodeGen/RISCV/fold-addi-loadstore.ll | 56 +- .../CodeGen/RISCV/fold-binop-into-select.ll | 2 +- llvm/test/CodeGen/RISCV/fold-masked-merge.ll | 68 +- llvm/test/CodeGen/RISCV/fold-mem-offset.ll | 88 +- llvm/test/CodeGen/RISCV/forced-atomics.ll | 52 +- llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll | 148 +- llvm/test/CodeGen/RISCV/fp128.ll | 60 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 390 +- llvm/test/CodeGen/RISCV/fpenv.ll | 2 +- llvm/test/CodeGen/RISCV/frm-write-in-loop.ll | 7 +- .../CodeGen/RISCV/get-setcc-result-type.ll | 24 +- llvm/test/CodeGen/RISCV/ghccc-rv32.ll | 76 +- llvm/test/CodeGen/RISCV/ghccc-rv64.ll | 76 +- .../test/CodeGen/RISCV/ghccc-without-f-reg.ll | 32 +- .../global-merge-minsize-smalldata-nonzero.ll | 8 +- .../global-merge-minsize-smalldata-zero.ll | 8 +- .../CodeGen/RISCV/global-merge-minsize.ll | 8 +- llvm/test/CodeGen/RISCV/global-merge.ll | 4 +- llvm/test/CodeGen/RISCV/half-arith-strict.ll | 124 +- llvm/test/CodeGen/RISCV/half-arith.ll | 278 +- .../RISCV/half-bitmanip-dagcombines.ll | 2 +- .../test/CodeGen/RISCV/half-convert-strict.ll | 36 +- llvm/test/CodeGen/RISCV/half-convert.ll | 1614 +- llvm/test/CodeGen/RISCV/half-fcmp-strict.ll | 200 +- llvm/test/CodeGen/RISCV/half-fcmp.ll | 152 +- llvm/test/CodeGen/RISCV/half-imm.ll | 2 +- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 132 +- llvm/test/CodeGen/RISCV/half-mem.ll | 16 +- .../test/CodeGen/RISCV/half-round-conv-sat.ll | 1980 +- llvm/test/CodeGen/RISCV/half-round-conv.ll | 1050 +- llvm/test/CodeGen/RISCV/half-select-fcmp.ll | 22 +- .../CodeGen/RISCV/hoist-global-addr-base.ll | 10 +- llvm/test/CodeGen/RISCV/i64-icmp.ll | 2 +- llvm/test/CodeGen/RISCV/iabs.ll | 80 +- .../test/CodeGen/RISCV/icmp-non-byte-sized.ll | 28 +- llvm/test/CodeGen/RISCV/idiv_large.ll | 3354 ++-- llvm/test/CodeGen/RISCV/imm.ll | 88 +- .../RISCV/inline-asm-d-constraint-f.ll | 14 +- .../CodeGen/RISCV/inline-asm-d-modifier-N.ll | 14 +- .../RISCV/inline-asm-f-constraint-f.ll | 28 +- .../CodeGen/RISCV/inline-asm-f-modifier-N.ll | 28 +- .../CodeGen/RISCV/interrupt-attr-nocall.ll | 52 +- .../RISCV/intrinsic-cttz-elts-vscale.ll | 50 +- .../test/CodeGen/RISCV/intrinsic-cttz-elts.ll | 16 +- llvm/test/CodeGen/RISCV/jumptable.ll | 8 +- .../RISCV/lack-of-signed-truncation-check.ll | 76 +- llvm/test/CodeGen/RISCV/legalize-fneg.ll | 10 +- llvm/test/CodeGen/RISCV/llvm.exp10.ll | 24 +- llvm/test/CodeGen/RISCV/llvm.frexp.ll | 68 +- ...op-strength-reduce-add-cheaper-than-mul.ll | 2 +- .../RISCV/loop-strength-reduce-loop-invar.ll | 44 +- llvm/test/CodeGen/RISCV/lsr-legaladdimm.ll | 2 +- llvm/test/CodeGen/RISCV/machine-cse.ll | 10 +- .../RISCV/machinelicm-constant-phys-reg.ll | 4 +- .../test/CodeGen/RISCV/mask-variable-shift.ll | 76 +- llvm/test/CodeGen/RISCV/memcmp-optsize.ll | 316 +- llvm/test/CodeGen/RISCV/memcmp.ll | 362 +- llvm/test/CodeGen/RISCV/memmove.ll | 64 +- llvm/test/CodeGen/RISCV/memset-inline.ll | 40 +- llvm/test/CodeGen/RISCV/memset-pattern.ll | 16 +- llvm/test/CodeGen/RISCV/min-max.ll | 17 +- llvm/test/CodeGen/RISCV/mul-expand.ll | 504 +- llvm/test/CodeGen/RISCV/mul.ll | 517 +- llvm/test/CodeGen/RISCV/narrow-shl-cst.ll | 8 +- llvm/test/CodeGen/RISCV/neg-abs.ll | 56 +- llvm/test/CodeGen/RISCV/nontemporal.ll | 1040 +- llvm/test/CodeGen/RISCV/or-is-add.ll | 8 +- llvm/test/CodeGen/RISCV/orc-b-patterns.ll | 4 +- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 112 +- llvm/test/CodeGen/RISCV/pr135206.ll | 16 +- llvm/test/CodeGen/RISCV/pr142004.ll | 8 +- llvm/test/CodeGen/RISCV/pr145360.ll | 41 +- llvm/test/CodeGen/RISCV/pr148084.ll | 4 +- llvm/test/CodeGen/RISCV/pr51206.ll | 26 +- llvm/test/CodeGen/RISCV/pr56457.ll | 50 +- llvm/test/CodeGen/RISCV/pr58511.ll | 20 +- llvm/test/CodeGen/RISCV/pr63816.ll | 2 +- llvm/test/CodeGen/RISCV/pr65025.ll | 26 +- llvm/test/CodeGen/RISCV/pr69586.ll | 2724 ++- llvm/test/CodeGen/RISCV/pr84653_pr85190.ll | 68 +- llvm/test/CodeGen/RISCV/pr95271.ll | 42 +- llvm/test/CodeGen/RISCV/pr95284.ll | 4 +- llvm/test/CodeGen/RISCV/push-pop-popret.ll | 690 +- .../CodeGen/RISCV/qci-interrupt-attr-fpr.ll | 464 +- llvm/test/CodeGen/RISCV/qci-interrupt-attr.ll | 404 +- .../CodeGen/RISCV/reassoc-shl-addi-add.ll | 10 +- .../redundant-copy-from-tail-duplicate.ll | 5 +- ...regalloc-last-chance-recoloring-failure.ll | 20 +- .../CodeGen/RISCV/riscv-codegenprepare-asm.ll | 10 +- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 1318 +- llvm/test/CodeGen/RISCV/rv32p.ll | 92 +- llvm/test/CodeGen/RISCV/rv32xtheadbb.ll | 82 +- llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 36 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 540 +- llvm/test/CodeGen/RISCV/rv32zbkb.ll | 10 +- llvm/test/CodeGen/RISCV/rv32zbs.ll | 62 +- .../test/CodeGen/RISCV/rv64-double-convert.ll | 81 +- llvm/test/CodeGen/RISCV/rv64-float-convert.ll | 61 +- llvm/test/CodeGen/RISCV/rv64-half-convert.ll | 40 +- .../test/CodeGen/RISCV/rv64i-complex-float.ll | 2 +- llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll | 20 +- .../RISCV/rv64i-w-insts-legalization.ll | 34 +- llvm/test/CodeGen/RISCV/rv64p.ll | 44 +- llvm/test/CodeGen/RISCV/rv64xtheadba.ll | 4 +- llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 426 +- llvm/test/CodeGen/RISCV/rv64zba.ll | 30 +- llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll | 58 +- llvm/test/CodeGen/RISCV/rv64zbb.ll | 624 +- llvm/test/CodeGen/RISCV/rv64zbkb.ll | 10 +- .../CodeGen/RISCV/rv64zfhmin-half-convert.ll | 8 +- .../RISCV/rvv/65704-illegal-instruction.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/abd.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/abs-vp.ll | 12 +- .../RISCV/rvv/access-fixed-objects-by-rvv.ll | 20 +- .../CodeGen/RISCV/rvv/active_lane_mask.ll | 246 +- .../rvv/alloca-load-store-scalable-array.ll | 20 +- .../rvv/alloca-load-store-scalable-struct.ll | 8 +- .../rvv/alloca-load-store-vector-tuple.ll | 32 +- .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll | 84 +- .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 1194 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 2774 +-- llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll | 608 +- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 1442 +- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 328 +- llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 366 +- .../RISCV/rvv/combine-ctpop-to-vcpop.ll | 134 +- .../RISCV/rvv/combine-reduce-add-to-vcpop.ll | 306 +- .../RISCV/rvv/combine-store-extract-crash.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 147 +- .../RISCV/rvv/concat-vector-insert-elt.ll | 37 +- .../RISCV/rvv/constant-folding-crash.ll | 17 +- llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 1176 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 312 +- llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll | 396 +- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 1340 +- llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 1290 +- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 1752 +- .../RISCV/rvv/dont-sink-splat-operands.ll | 36 +- .../CodeGen/RISCV/rvv/double-round-conv.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll | 30 +- llvm/test/CodeGen/RISCV/rvv/expandload.ll | 689 +- .../CodeGen/RISCV/rvv/extload-truncstore.ll | 2 +- .../CodeGen/RISCV/rvv/extract-subvector.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 62 +- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 40 +- .../CodeGen/RISCV/rvv/extractelt-int-rv32.ll | 44 +- .../CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 42 +- .../RISCV/rvv/fceil-constrained-sdnode.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll | 124 +- .../RISCV/rvv/ffloor-constrained-sdnode.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll | 124 +- .../rvv/fixed-vector-i8-index-cornercase.ll | 126 +- .../CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll | 14 +- .../RISCV/rvv/fixed-vectors-binop-splats.ll | 50 +- .../rvv/fixed-vectors-bitcast-large-vector.ll | 10 +- .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 2583 +-- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 420 +- .../RISCV/rvv/fixed-vectors-bswap-vp.ll | 1362 +- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 184 +- .../rvv/fixed-vectors-buildvec-of-binop.ll | 181 +- .../rvv/fixed-vectors-calling-conv-fastcc.ll | 64 +- .../RISCV/rvv/fixed-vectors-calling-conv.ll | 50 +- .../RISCV/rvv/fixed-vectors-ceil-vp.ll | 360 +- .../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 3228 +-- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 646 +- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 1115 +- .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll | 186 +- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 2752 +-- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 630 +- .../rvv/fixed-vectors-deinterleave-load.ll | 15 +- .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 60 +- .../rvv/fixed-vectors-extload-truncstore.ll | 2 +- .../RISCV/rvv/fixed-vectors-extract-i1.ll | 96 +- .../rvv/fixed-vectors-extract-subvector.ll | 69 +- .../RISCV/rvv/fixed-vectors-extract.ll | 160 +- .../fixed-vectors-fceil-constrained-sdnode.ll | 100 +- ...fixed-vectors-ffloor-constrained-sdnode.ll | 100 +- .../RISCV/rvv/fixed-vectors-floor-vp.ll | 360 +- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 162 +- .../RISCV/rvv/fixed-vectors-fmaximum.ll | 75 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 162 +- .../RISCV/rvv/fixed-vectors-fminimum.ll | 75 +- ...d-vectors-fnearbyint-constrained-sdnode.ll | 80 +- .../rvv/fixed-vectors-fp-buildvec-bf16.ll | 36 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 686 +- .../RISCV/rvv/fixed-vectors-fp-setcc.ll | 4672 ++--- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 628 +- .../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 52 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 64 +- .../RISCV/rvv/fixed-vectors-fpext-vp.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll | 28 +- .../RISCV/rvv/fixed-vectors-fptosi-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-fptoui-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-fptrunc-vp.ll | 22 +- ...fixed-vectors-fround-constrained-sdnode.ll | 100 +- .../CodeGen/RISCV/rvv/fixed-vectors-fround.ll | 102 +- ...d-vectors-froundeven-constrained-sdnode.ll | 100 +- .../RISCV/rvv/fixed-vectors-froundeven.ll | 102 +- .../RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll | 81 +- ...fixed-vectors-ftrunc-constrained-sdnode.ll | 100 +- .../RISCV/rvv/fixed-vectors-insert-i1.ll | 6 +- .../rvv/fixed-vectors-insert-subvector.ll | 98 +- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 91 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 2439 +-- .../rvv/fixed-vectors-int-explodevector.ll | 1404 +- .../RISCV/rvv/fixed-vectors-int-setcc.ll | 24 +- .../RISCV/rvv/fixed-vectors-int-splat.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 568 +- .../rvv/fixed-vectors-interleave-store.ll | 5 +- ...fixed-vectors-interleaved-access-zve32x.ll | 69 +- .../rvv/fixed-vectors-interleaved-access.ll | 1212 +- .../CodeGen/RISCV/rvv/fixed-vectors-load.ll | 44 +- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 56 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 2158 +- .../RISCV/rvv/fixed-vectors-masked-load-fp.ll | 8 +- .../rvv/fixed-vectors-masked-load-int.ll | 19 +- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 2212 +- .../rvv/fixed-vectors-masked-store-fp.ll | 8 +- .../rvv/fixed-vectors-masked-store-int.ll | 10 +- .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 196 +- .../rvv/fixed-vectors-reduction-formation.ll | 74 +- .../rvv/fixed-vectors-reduction-fp-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 674 +- .../rvv/fixed-vectors-reduction-int-vp.ll | 188 +- .../RISCV/rvv/fixed-vectors-reduction-int.ll | 875 +- .../rvv/fixed-vectors-reduction-mask-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-rint-vp.ll | 180 +- .../RISCV/rvv/fixed-vectors-round-vp.ll | 360 +- .../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 360 +- .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 360 +- .../CodeGen/RISCV/rvv/fixed-vectors-sad.ll | 44 +- .../RISCV/rvv/fixed-vectors-scalarized.ll | 108 +- .../RISCV/rvv/fixed-vectors-select-addsub.ll | 13 +- .../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 2370 +-- .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 78 +- .../fixed-vectors-shuffle-changes-length.ll | 172 +- .../RISCV/rvv/fixed-vectors-shuffle-concat.ll | 24 +- .../rvv/fixed-vectors-shuffle-deinterleave.ll | 74 +- .../fixed-vectors-shuffle-deinterleave2.ll | 514 +- .../rvv/fixed-vectors-shuffle-exact-vlen.ll | 66 +- .../fixed-vectors-shuffle-fp-interleave.ll | 44 +- .../RISCV/rvv/fixed-vectors-shuffle-fp.ll | 57 +- .../fixed-vectors-shuffle-int-interleave.ll | 106 +- .../RISCV/rvv/fixed-vectors-shuffle-int.ll | 242 +- .../rvv/fixed-vectors-shuffle-reverse.ll | 232 +- .../RISCV/rvv/fixed-vectors-shuffle-rotate.ll | 524 +- .../RISCV/rvv/fixed-vectors-sitofp-vp.ll | 16 +- .../rvv/fixed-vectors-store-merge-crash.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-store.ll | 10 +- .../rvv/fixed-vectors-strided-load-combine.ll | 44 +- .../fixed-vectors-strided-load-store-asm.ll | 212 +- .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 30 +- .../rvv/fixed-vectors-strided-vpstore.ll | 20 +- .../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 30 +- .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 590 +- .../RISCV/rvv/fixed-vectors-uitofp-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 140 +- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 45 +- .../RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 25 +- .../RISCV/rvv/fixed-vectors-vfabs-vp.ll | 16 +- .../fixed-vectors-vfcmp-constrained-sdnode.ll | 408 +- ...fixed-vectors-vfcmps-constrained-sdnode.ll | 116 +- .../RISCV/rvv/fixed-vectors-vfma-vp.ll | 172 +- .../RISCV/rvv/fixed-vectors-vfmax-vp.ll | 25 +- .../RISCV/rvv/fixed-vectors-vfmin-vp.ll | 25 +- .../RISCV/rvv/fixed-vectors-vfmuladd-vp.ll | 100 +- .../RISCV/rvv/fixed-vectors-vfneg-vp.ll | 16 +- .../RISCV/rvv/fixed-vectors-vfsqrt-vp.ll | 16 +- .../fixed-vectors-vfw-web-simplification.ll | 46 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll | 2 +- .../RISCV/rvv/fixed-vectors-vfwmaccbf16.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll | 2 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll | 2 +- ...fixed-vectors-vitofp-constrained-sdnode.ll | 6 +- .../RISCV/rvv/fixed-vectors-vmax-vp.ll | 24 +- .../RISCV/rvv/fixed-vectors-vmaxu-vp.ll | 24 +- .../RISCV/rvv/fixed-vectors-vmin-vp.ll | 24 +- .../RISCV/rvv/fixed-vectors-vminu-vp.ll | 24 +- .../RISCV/rvv/fixed-vectors-vpgather.ll | 263 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 42 +- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 18 +- .../RISCV/rvv/fixed-vectors-vpscatter.ll | 118 +- .../RISCV/rvv/fixed-vectors-vpstore.ll | 10 +- .../rvv/fixed-vectors-vreductions-mask.ll | 42 +- .../CodeGen/RISCV/rvv/fixed-vectors-vrol.ll | 213 +- .../CodeGen/RISCV/rvv/fixed-vectors-vror.ll | 257 +- .../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 35 +- .../RISCV/rvv/fixed-vectors-vsaddu-vp.ll | 29 +- .../RISCV/rvv/fixed-vectors-vscale-range.ll | 110 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 90 +- .../RISCV/rvv/fixed-vectors-vselect.ll | 444 +- .../RISCV/rvv/fixed-vectors-vssub-vp.ll | 41 +- .../RISCV/rvv/fixed-vectors-vssubu-vp.ll | 35 +- .../fixed-vectors-vw-web-simplification.ll | 54 +- .../RISCV/rvv/fixed-vectors-vwadd-mask.ll | 14 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll | 132 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 108 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 152 +- .../RISCV/rvv/fixed-vectors-vwmulsu.ll | 152 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll | 138 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll | 45 +- .../RISCV/rvv/fixed-vectors-vwsub-mask.ll | 12 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll | 126 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll | 120 +- .../RISCV/rvv/fixed-vectors-zvqdotq.ll | 681 +- .../CodeGen/RISCV/rvv/float-round-conv.ll | 80 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 446 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 279 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 601 +- .../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll | 90 +- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 279 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 601 +- .../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll | 90 +- .../rvv/fnearbyint-constrained-sdnode.ll | 96 +- .../CodeGen/RISCV/rvv/fnearbyint-sdnode.ll | 128 +- .../CodeGen/RISCV/rvv/fold-binary-reduce.ll | 11 +- .../RISCV/rvv/fold-scalar-load-crash.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll | 14 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 542 +- llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll | 22 +- llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll | 22 +- llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll | 124 +- llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 20 +- .../RISCV/rvv/fround-constrained-sdnode.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll | 124 +- .../rvv/froundeven-constrained-sdnode.ll | 96 +- .../CodeGen/RISCV/rvv/froundeven-sdnode.ll | 124 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 583 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl.ll | 20 +- .../RISCV/rvv/ftrunc-constrained-sdnode.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll | 124 +- .../CodeGen/RISCV/rvv/get_vector_length.ll | 8 +- .../test/CodeGen/RISCV/rvv/half-round-conv.ll | 48 +- .../incorrect-extract-subvector-combine.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/inline-asm.ll | 2 +- .../CodeGen/RISCV/rvv/insert-subvector.ll | 17 +- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 48 +- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll | 8 +- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 18 +- .../CodeGen/RISCV/rvv/interleave-crash.ll | 8 +- .../RISCV/rvv/intrinsic-vector-match.ll | 1395 +- .../CodeGen/RISCV/rvv/known-never-zero.ll | 30 +- llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/llround-sdnode.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll | 64 +- llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/lround-sdnode.ll | 64 +- llvm/test/CodeGen/RISCV/rvv/mask-exts-not.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll | 68 +- llvm/test/CodeGen/RISCV/rvv/memset-inline.ll | 92 +- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 131 +- .../RISCV/rvv/mixed-float-bf16-arith.ll | 8 +- .../CodeGen/RISCV/rvv/mscatter-combine.ll | 20 +- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 159 +- .../RISCV/rvv/mutate-prior-vsetvli-avl.ll | 8 +- .../RISCV/rvv/named-vector-shuffle-reverse.ll | 379 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 454 +- .../RISCV/rvv/nontemporal-vp-scalable.ll | 4874 +++-- llvm/test/CodeGen/RISCV/rvv/pr104480.ll | 52 +- llvm/test/CodeGen/RISCV/rvv/pr125306.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/pr165232.ll | 294 +- llvm/test/CodeGen/RISCV/rvv/pr52475.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/pr61561.ll | 23 +- llvm/test/CodeGen/RISCV/rvv/pr83017.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/pr88576.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/pr90559.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/pr95865.ll | 14 +- .../rvv/regcoal-liveinterval-pruning-crash.ll | 11 +- llvm/test/CodeGen/RISCV/rvv/remat.ll | 74 +- .../CodeGen/RISCV/rvv/reproducer-pr146855.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 446 +- .../RISCV/rvv/riscv-codegenprepare-asm.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 446 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 446 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 446 +- .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 8 +- .../test/CodeGen/RISCV/rvv/rvv-framelayout.ll | 6 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 548 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 118 +- llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 6 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 146 +- llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll | 120 +- .../rvv/splat-vector-split-i64-vl-sdnode.ll | 104 +- .../CodeGen/RISCV/rvv/splats-with-mixed-vl.ll | 27 +- llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 24 +- .../RISCV/rvv/stack-probing-dynamic.ll | 4 +- .../CodeGen/RISCV/rvv/stack-slot-coloring.ll | 100 +- llvm/test/CodeGen/RISCV/rvv/stepvector.ll | 112 +- .../RISCV/rvv/stores-of-loads-merging.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 33 +- .../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 60 +- .../RISCV/rvv/trunc-sat-clip-sdnode.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll | 104 +- .../RISCV/rvv/undef-earlyclobber-chain.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 52 +- llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 34 +- llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll | 16 +- .../CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll | 40 +- llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 56 +- llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll | 2 +- .../CodeGen/RISCV/rvv/vec3-setcc-crash.ll | 12 +- .../RISCV/rvv/vector-deinterleave-fixed.ll | 333 +- .../RISCV/rvv/vector-deinterleave-load.ll | 59 +- .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 349 +- .../RISCV/rvv/vector-extract-last-active.ll | 16 +- .../RISCV/rvv/vector-interleave-fixed.ll | 1245 +- .../RISCV/rvv/vector-interleave-store.ll | 24 +- .../CodeGen/RISCV/rvv/vector-interleave.ll | 7500 +++---- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 394 +- llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 12 +- .../RISCV/rvv/vfadd-constrained-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll | 182 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 590 +- .../RISCV/rvv/vfcmp-constrained-sdnode.ll | 312 +- .../RISCV/rvv/vfcmps-constrained-sdnode.ll | 150 +- .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 286 +- .../RISCV/rvv/vfdiv-constrained-sdnode.ll | 98 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll | 122 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 236 +- .../RISCV/rvv/vfirst-byte-compare-index.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfma-vp-combine.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 1 + .../RISCV/rvv/vfmadd-constrained-sdnode.ll | 308 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll | 457 +- llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 144 +- llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 144 +- .../RISCV/rvv/vfmsub-constrained-sdnode.ll | 201 +- .../RISCV/rvv/vfmul-constrained-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 118 +- llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll | 152 +- llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 12 +- .../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 104 +- .../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 121 +- llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 96 +- llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 55 +- .../RISCV/rvv/vfsqrt-constrained-sdnode.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll | 40 +- .../RISCV/rvv/vfsub-constrained-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll | 90 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 236 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-sdnode.ll | 60 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll | 40 +- .../RISCV/rvv/vl-opt-evl-tail-folding.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 54 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 54 +- llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 54 +- llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 54 +- llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll | 2 +- .../RISCV/rvv/vp-combine-reverse-load.ll | 12 +- .../RISCV/rvv/vp-combine-store-reverse.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll | 34 +- .../rvv/vp-reverse-mask-fixed-vectors.ll | 24 +- .../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll | 52 +- .../RISCV/rvv/vp-splice-mask-fixed-vectors.ll | 32 +- .../RISCV/rvv/vp-splice-mask-vectors.ll | 118 +- llvm/test/CodeGen/RISCV/rvv/vp-splice.ll | 70 +- llvm/test/CodeGen/RISCV/rvv/vp-vaaddu.ll | 2 +- .../RISCV/rvv/vp-vector-interleaved-access.ll | 72 +- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 143 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 40 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 121 +- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 126 +- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 42 +- .../RISCV/rvv/vreductions-fp-sdnode.ll | 22 +- .../RISCV/rvv/vreductions-fp-vp-bf16.ll | 20 +- .../RISCV/rvv/vreductions-fp-vp-f16.ll | 40 +- .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 36 +- .../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 184 +- .../test/CodeGen/RISCV/rvv/vreductions-int.ll | 180 +- .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll | 2 +- .../CodeGen/RISCV/rvv/vreductions-mask.ll | 154 +- llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 56 +- llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 40 +- llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll | 254 +- llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll | 254 +- llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 34 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 36 +- .../CodeGen/RISCV/rvv/vscale-power-of-two.ll | 2 +- .../RISCV/rvv/vscale-vw-web-simplification.ll | 264 +- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 11 +- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 86 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 40 +- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 48 +- llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 36 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 38 +- llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 552 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 87 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 48 +- .../CodeGen/RISCV/rvv/vwadd-mask-sdnode.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll | 12 +- .../CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll | 2 +- .../RISCV/rvv/vxrm-insert-out-of-loop.ll | 128 +- llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll | 16 +- .../RISCV/rvv/wrong-chain-fixed-load.ll | 4 +- .../CodeGen/RISCV/rvv/zvbb-demanded-bits.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll | 347 +- llvm/test/CodeGen/RISCV/sadd_sat.ll | 42 +- llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 76 +- llvm/test/CodeGen/RISCV/scmp.ll | 2 +- .../CodeGen/RISCV/select-binop-identity.ll | 4 +- llvm/test/CodeGen/RISCV/select-cc.ll | 112 +- llvm/test/CodeGen/RISCV/select-const.ll | 24 +- .../test/CodeGen/RISCV/select-constant-xor.ll | 16 +- llvm/test/CodeGen/RISCV/select-zbb.ll | 68 +- llvm/test/CodeGen/RISCV/select.ll | 84 +- llvm/test/CodeGen/RISCV/sextw-removal.ll | 172 +- llvm/test/CodeGen/RISCV/shift-amount-mod.ll | 38 +- llvm/test/CodeGen/RISCV/shift-and.ll | 2 +- llvm/test/CodeGen/RISCV/shift-masked-shamt.ll | 6 +- llvm/test/CodeGen/RISCV/shifts.ll | 484 +- llvm/test/CodeGen/RISCV/shl-cttz.ll | 156 +- llvm/test/CodeGen/RISCV/shlimm-addimm.ll | 104 +- .../RISCV/short-forward-branch-opt-mul.ll | 8 +- .../CodeGen/RISCV/signed-truncation-check.ll | 100 +- llvm/test/CodeGen/RISCV/split-offsets.ll | 16 +- llvm/test/CodeGen/RISCV/split-store.ll | 4 +- .../CodeGen/RISCV/split-udiv-by-constant.ll | 509 +- .../CodeGen/RISCV/split-urem-by-constant.ll | 128 +- llvm/test/CodeGen/RISCV/srem-lkk.ll | 48 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 550 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 658 +- llvm/test/CodeGen/RISCV/ssub_sat.ll | 44 +- llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 80 +- .../CodeGen/RISCV/stack-clash-prologue.ll | 10 +- llvm/test/CodeGen/RISCV/stack-store-check.ll | 16 +- llvm/test/CodeGen/RISCV/tail-calls.ll | 10 +- llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll | 2 +- .../CodeGen/RISCV/typepromotion-overflow.ll | 2 +- llvm/test/CodeGen/RISCV/uadd_sat.ll | 24 +- llvm/test/CodeGen/RISCV/uadd_sat_plus.ll | 38 +- llvm/test/CodeGen/RISCV/ucmp.ll | 2 +- .../RISCV/umulo-128-legalisation-lowering.ll | 144 +- .../CodeGen/RISCV/unaligned-load-store.ll | 262 +- ...unfold-masked-merge-scalar-variablemask.ll | 10 +- llvm/test/CodeGen/RISCV/urem-lkk.ll | 34 +- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 524 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 406 +- llvm/test/CodeGen/RISCV/usub_sat_plus.ll | 38 +- llvm/test/CodeGen/RISCV/vararg-ilp32e.ll | 16 +- llvm/test/CodeGen/RISCV/vararg.ll | 1120 +- .../RISCV/varargs-with-fp-and-second-adj.ll | 4 +- ...lar-shift-by-byte-multiple-legalization.ll | 8930 ++++----- .../RISCV/wide-scalar-shift-legalization.ll | 3908 ++-- llvm/test/CodeGen/RISCV/xaluo.ll | 1780 +- .../CodeGen/RISCV/xqccmp-additional-stack.ll | 13 +- .../CodeGen/RISCV/xqccmp-callee-saved-gprs.ll | 164 +- .../CodeGen/RISCV/xqccmp-push-pop-popret.ll | 698 +- llvm/test/CodeGen/RISCV/xqcia.ll | 8 +- llvm/test/CodeGen/RISCV/xqciac.ll | 36 +- .../test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll | 528 +- llvm/test/CodeGen/RISCV/xqcibm-insbi.ll | 14 +- llvm/test/CodeGen/RISCV/xqcibm-insert.ll | 26 +- llvm/test/CodeGen/RISCV/xqcilia.ll | 16 +- llvm/test/CodeGen/RISCV/xqcilsm-memset.ll | 38 +- llvm/test/CodeGen/RISCV/xqcisls.ll | 24 +- llvm/test/CodeGen/RISCV/xtheadmac.ll | 22 +- llvm/test/CodeGen/RISCV/xtheadmemidx.ll | 62 +- llvm/test/CodeGen/RISCV/xtheadmempair.ll | 14 +- llvm/test/CodeGen/RISCV/zbb-logic-neg-imm.ll | 12 +- .../CodeGen/RISCV/zcmp-additional-stack.ll | 13 +- .../CodeGen/RISCV/zdinx-boundary-check.ll | 68 +- llvm/test/CodeGen/RISCV/zicond-opts.ll | 22 +- llvm/test/CodeGen/RISCV/zilsd.ll | 48 +- 726 files changed, 100768 insertions(+), 100306 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 9e31d08ae2243..27f1ce0f393e0 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -88,22 +88,22 @@ class RISCVTuneProcessorModel, GenericTuneInfo; def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", - NoSchedModel, + SpacemitX60Model, [Feature64Bit, FeatureStdExtI], GenericTuneFeatures>, GenericTuneInfo; // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate rv32/rv64 version. -// `generic` is expected to target in-order application processors designed for -// general-purpose computing. def GENERIC : RISCVTuneProcessorModel<"generic", SpacemitX60Model>, GenericTuneInfo; // `generic-ooo` is expected to target out-of-order application processors designed diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll index 0fd23a7d346df..2686880a5836b 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll @@ -170,16 +170,16 @@ define signext i32 @add32_sext_accept(i32 signext %a) nounwind { define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind { ; RV32I-LABEL: add32_sext_reject_on_rv64: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, %hi(gv0) ; RV32I-NEXT: addi a0, a0, 2047 +; RV32I-NEXT: lui a1, %hi(gv0) ; RV32I-NEXT: addi a0, a0, 953 ; RV32I-NEXT: sw a0, %lo(gv0)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: add32_sext_reject_on_rv64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, %hi(gv0) ; RV64I-NEXT: addi a0, a0, 2047 +; RV64I-NEXT: lui a1, %hi(gv0) ; RV64I-NEXT: addiw a0, a0, 953 ; RV64I-NEXT: sw a0, %lo(gv0)(a1) ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll index 487cb5768dcad..24dc8ceaa6516 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll @@ -438,11 +438,10 @@ entry: define i64 @sub_i64(i64 %a, i64 %b) { ; RV32IM-LABEL: sub_i64: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: sub a4, a0, a2 -; RV32IM-NEXT: sltu a0, a0, a2 +; RV32IM-NEXT: sltu a4, a0, a2 ; RV32IM-NEXT: sub a1, a1, a3 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: mv a0, a4 +; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: sub a1, a1, a4 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: sub_i64: @@ -479,11 +478,10 @@ entry: define i64 @neg_i64(i64 %a) { ; RV32IM-LABEL: neg_i64: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: neg a2, a0 -; RV32IM-NEXT: snez a0, a0 +; RV32IM-NEXT: snez a2, a0 ; RV32IM-NEXT: neg a1, a1 -; RV32IM-NEXT: sub a1, a1, a0 -; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: neg a0, a0 +; RV32IM-NEXT: sub a1, a1, a2 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: neg_i64: @@ -594,13 +592,12 @@ entry: define i64 @mul_i64(i64 %a, i64 %b) { ; RV32IM-LABEL: mul_i64: ; RV32IM: # %bb.0: # %entry -; RV32IM-NEXT: mul a4, a0, a2 ; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: mul a3, a0, a3 -; RV32IM-NEXT: mulhu a0, a0, a2 +; RV32IM-NEXT: mulhu a4, a0, a2 +; RV32IM-NEXT: mul a0, a0, a2 ; RV32IM-NEXT: add a1, a1, a3 -; RV32IM-NEXT: add a1, a1, a0 -; RV32IM-NEXT: mv a0, a4 +; RV32IM-NEXT: add a1, a1, a4 ; RV32IM-NEXT: ret ; ; RV64IM-LABEL: mul_i64: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll index 2fb9dcfeef785..7b59faeb0e8ee 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll @@ -43,10 +43,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: andi a4, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -80,10 +80,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -104,10 +104,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; RV64IA-ZACAS-NEXT: li a3, 255 ; RV64IA-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -133,10 +133,10 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -174,10 +174,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -198,10 +198,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -222,10 +222,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -246,10 +246,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -283,10 +283,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -307,10 +307,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -336,10 +336,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -360,10 +360,10 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -406,10 +406,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -430,10 +430,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -454,10 +454,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -478,10 +478,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -515,10 +515,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -539,10 +539,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -568,10 +568,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -592,10 +592,10 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 @@ -638,10 +638,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -662,10 +662,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -686,10 +686,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -710,10 +710,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -747,10 +747,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -771,10 +771,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -800,10 +800,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -824,10 +824,10 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -870,10 +870,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -894,10 +894,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -918,10 +918,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -942,10 +942,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -979,10 +979,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -1003,10 +1003,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -1032,10 +1032,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -1056,10 +1056,10 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -1102,10 +1102,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1126,10 +1126,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1150,10 +1150,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1174,10 +1174,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1211,10 +1211,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1235,10 +1235,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1264,10 +1264,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1288,10 +1288,10 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 @@ -1334,10 +1334,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-NEXT: li a3, 255 ; RV32IA-WMO-NEXT: andi a4, a0, -4 ; RV32IA-WMO-NEXT: andi a0, a0, 3 -; RV32IA-WMO-NEXT: zext.b a1, a1 -; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: zext.b a2, a2 ; RV32IA-WMO-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NEXT: sll a0, a2, a0 ; RV32IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1358,10 +1358,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1382,10 +1382,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-NEXT: li a3, 255 ; RV32IA-TSO-NEXT: andi a4, a0, -4 ; RV32IA-TSO-NEXT: andi a0, a0, 3 -; RV32IA-TSO-NEXT: zext.b a1, a1 -; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: zext.b a2, a2 ; RV32IA-TSO-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NEXT: sll a0, a2, a0 ; RV32IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1406,10 +1406,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1443,10 +1443,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1467,10 +1467,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1496,10 +1496,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1520,10 +1520,10 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 @@ -1566,10 +1566,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: andi a4, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 @@ -1603,10 +1603,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 @@ -1627,10 +1627,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-ZACAS-NEXT: li a3, 255 ; RV64IA-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-ZACAS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 @@ -1656,10 +1656,10 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 @@ -1702,10 +1702,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: andi a4, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 @@ -1739,10 +1739,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 @@ -1763,10 +1763,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-ZACAS-NEXT: li a3, 255 ; RV64IA-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-ZACAS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 @@ -1792,10 +1792,10 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 @@ -1838,10 +1838,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: andi a4, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: zext.b a2, a2 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: sll a0, a2, a0 ; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 @@ -1875,10 +1875,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-WMO-NEXT: li a3, 255 ; RV64IA-WMO-NEXT: andi a4, a0, -4 ; RV64IA-WMO-NEXT: andi a0, a0, 3 -; RV64IA-WMO-NEXT: zext.b a1, a1 -; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: zext.b a2, a2 ; RV64IA-WMO-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NEXT: sllw a0, a2, a0 ; RV64IA-WMO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 @@ -1899,10 +1899,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-ZACAS-NEXT: li a3, 255 ; RV64IA-ZACAS-NEXT: andi a4, a0, -4 ; RV64IA-ZACAS-NEXT: andi a0, a0, 3 -; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 ; RV64IA-ZACAS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 @@ -1929,10 +1929,10 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; RV64IA-TSO-NEXT: li a3, 255 ; RV64IA-TSO-NEXT: andi a4, a0, -4 ; RV64IA-TSO-NEXT: andi a0, a0, 3 -; RV64IA-TSO-NEXT: zext.b a1, a1 -; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: zext.b a2, a2 ; RV64IA-TSO-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NEXT: sllw a0, a2, a0 ; RV64IA-TSO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 @@ -1974,25 +1974,25 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a4, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: andi a4, a0, 3 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: slli a4, a4, 3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a4) -; RV32IA-NEXT: and a3, a2, a5 -; RV32IA-NEXT: bne a3, a1, .LBB10_3 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a3, a2, a0 -; RV32IA-NEXT: and a3, a3, a5 -; RV32IA-NEXT: xor a3, a2, a3 -; RV32IA-NEXT: sc.w a3, a3, (a4) -; RV32IA-NEXT: bnez a3, .LBB10_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB10_1 ; RV32IA-NEXT: .LBB10_3: ; RV32IA-NEXT: ret ; @@ -2012,50 +2012,50 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; RV64IA-WMO-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB10_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB10_1 ; RV64IA-WMO-NEXT: .LBB10_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-ZACAS: # %bb.0: ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-ZACAS-NEXT: bnez a3, .LBB10_1 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB10_1 ; RV64IA-ZACAS-NEXT: .LBB10_3: ; RV64IA-ZACAS-NEXT: ret ; @@ -2067,25 +2067,25 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; RV64IA-TSO-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB10_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB10_1 ; RV64IA-TSO-NEXT: .LBB10_3: ; RV64IA-TSO-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic @@ -2109,100 +2109,100 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB11_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB11_1 ; RV32IA-WMO-NEXT: .LBB11_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB11_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB11_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV32IA-TSO-NEXT: .LBB11_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB11_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -2222,50 +2222,50 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB11_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB11_1 ; RV64IA-WMO-NEXT: .LBB11_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB11_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -2277,50 +2277,50 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB11_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-NEXT: .LBB11_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB11_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -2349,100 +2349,100 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB12_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB12_1 ; RV32IA-WMO-NEXT: .LBB12_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB12_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB12_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV32IA-TSO-NEXT: .LBB12_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB12_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -2462,50 +2462,50 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB12_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB12_1 ; RV64IA-WMO-NEXT: .LBB12_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB12_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -2517,50 +2517,50 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB12_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-NEXT: .LBB12_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB12_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -2589,100 +2589,100 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB13_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB13_1 ; RV32IA-WMO-NEXT: .LBB13_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB13_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB13_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV32IA-TSO-NEXT: .LBB13_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB13_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -2702,50 +2702,50 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB13_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB13_1 ; RV64IA-WMO-NEXT: .LBB13_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB13_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -2757,50 +2757,50 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB13_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-NEXT: .LBB13_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB13_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -2829,100 +2829,100 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB14_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB14_1 ; RV32IA-WMO-NEXT: .LBB14_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB14_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB14_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV32IA-TSO-NEXT: .LBB14_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB14_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -2942,50 +2942,50 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB14_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB14_1 ; RV64IA-WMO-NEXT: .LBB14_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB14_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -2997,50 +2997,50 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB14_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-NEXT: .LBB14_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB14_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -3069,100 +3069,100 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB15_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB15_1 ; RV32IA-WMO-NEXT: .LBB15_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB15_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB15_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV32IA-TSO-NEXT: .LBB15_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB15_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -3182,50 +3182,50 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB15_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB15_1 ; RV64IA-WMO-NEXT: .LBB15_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB15_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -3237,50 +3237,50 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB15_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-NEXT: .LBB15_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB15_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -3309,100 +3309,100 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: lui a3, 16 -; RV32IA-WMO-NEXT: andi a4, a0, -4 -; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: andi a4, a0, 3 ; RV32IA-WMO-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: slli a4, a4, 3 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 ; RV32IA-WMO-NEXT: and a1, a1, a3 ; RV32IA-WMO-NEXT: and a2, a2, a3 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-NEXT: and a3, a2, a5 -; RV32IA-WMO-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 +; RV32IA-WMO-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-WMO-NEXT: xor a3, a2, a0 -; RV32IA-WMO-NEXT: and a3, a3, a5 -; RV32IA-WMO-NEXT: xor a3, a2, a3 -; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-NEXT: bnez a3, .LBB16_1 +; RV32IA-WMO-NEXT: xor a4, a3, a2 +; RV32IA-WMO-NEXT: and a4, a4, a5 +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB16_1 ; RV32IA-WMO-NEXT: .LBB16_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB16_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: lui a3, 16 -; RV32IA-TSO-NEXT: andi a4, a0, -4 -; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: andi a4, a0, 3 ; RV32IA-TSO-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: slli a4, a4, 3 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 ; RV32IA-TSO-NEXT: and a1, a1, a3 ; RV32IA-TSO-NEXT: and a2, a2, a3 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a4) -; RV32IA-TSO-NEXT: and a3, a2, a5 -; RV32IA-TSO-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 +; RV32IA-TSO-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-TSO-NEXT: xor a3, a2, a0 -; RV32IA-TSO-NEXT: and a3, a3, a5 -; RV32IA-TSO-NEXT: xor a3, a2, a3 -; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-NEXT: bnez a3, .LBB16_1 +; RV32IA-TSO-NEXT: xor a4, a3, a2 +; RV32IA-TSO-NEXT: and a4, a4, a5 +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV32IA-TSO-NEXT: .LBB16_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB16_3: ; RV32IA-TSO-ZACAS-NEXT: ret ; @@ -3422,50 +3422,50 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB16_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB16_1 ; RV64IA-WMO-NEXT: .LBB16_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) -; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB16_3: ; RV64IA-WMO-ZACAS-NEXT: ret ; @@ -3477,50 +3477,50 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB16_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-NEXT: .LBB16_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) -; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) -; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) +; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB16_3: ; RV64IA-TSO-ZACAS-NEXT: ret ; @@ -3549,25 +3549,25 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a4, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: andi a4, a0, 3 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: slli a4, a4, 3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a4) -; RV32IA-NEXT: and a3, a2, a5 -; RV32IA-NEXT: bne a3, a1, .LBB17_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB17_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV32IA-NEXT: xor a3, a2, a0 -; RV32IA-NEXT: and a3, a3, a5 -; RV32IA-NEXT: xor a3, a2, a3 -; RV32IA-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-NEXT: bnez a3, .LBB17_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB17_1 ; RV32IA-NEXT: .LBB17_3: ; RV32IA-NEXT: ret ; @@ -3587,50 +3587,50 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB17_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB17_1 ; RV64IA-WMO-NEXT: .LBB17_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-ZACAS: # %bb.0: ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-ZACAS-NEXT: bnez a3, .LBB17_1 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB17_1 ; RV64IA-ZACAS-NEXT: .LBB17_3: ; RV64IA-ZACAS-NEXT: ret ; @@ -3642,25 +3642,25 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB17_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB17_1 ; RV64IA-TSO-NEXT: .LBB17_3: ; RV64IA-TSO-NEXT: ret ; @@ -3689,25 +3689,25 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a4, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: andi a4, a0, 3 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: slli a4, a4, 3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a4) -; RV32IA-NEXT: and a3, a2, a5 -; RV32IA-NEXT: bne a3, a1, .LBB18_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB18_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV32IA-NEXT: xor a3, a2, a0 -; RV32IA-NEXT: and a3, a3, a5 -; RV32IA-NEXT: xor a3, a2, a3 -; RV32IA-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-NEXT: bnez a3, .LBB18_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB18_1 ; RV32IA-NEXT: .LBB18_3: ; RV32IA-NEXT: ret ; @@ -3727,50 +3727,50 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB18_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB18_1 ; RV64IA-WMO-NEXT: .LBB18_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-ZACAS: # %bb.0: ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-ZACAS-NEXT: bnez a3, .LBB18_1 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB18_1 ; RV64IA-ZACAS-NEXT: .LBB18_3: ; RV64IA-ZACAS-NEXT: ret ; @@ -3782,25 +3782,25 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB18_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB18_1 ; RV64IA-TSO-NEXT: .LBB18_3: ; RV64IA-TSO-NEXT: ret ; @@ -3829,25 +3829,25 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a4, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: andi a4, a0, 3 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: slli a4, a4, 3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: and a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a4) -; RV32IA-NEXT: and a3, a2, a5 -; RV32IA-NEXT: bne a3, a1, .LBB19_3 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 +; RV32IA-NEXT: bne a4, a1, .LBB19_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV32IA-NEXT: xor a3, a2, a0 -; RV32IA-NEXT: and a3, a3, a5 -; RV32IA-NEXT: xor a3, a2, a3 -; RV32IA-NEXT: sc.w.rl a3, a3, (a4) -; RV32IA-NEXT: bnez a3, .LBB19_1 +; RV32IA-NEXT: xor a4, a3, a2 +; RV32IA-NEXT: and a4, a4, a5 +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) +; RV32IA-NEXT: bnez a4, .LBB19_1 ; RV32IA-NEXT: .LBB19_3: ; RV32IA-NEXT: ret ; @@ -3867,50 +3867,50 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: lui a3, 16 -; RV64IA-WMO-NEXT: andi a4, a0, -4 -; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: andi a4, a0, 3 ; RV64IA-WMO-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: slli a4, a4, 3 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 ; RV64IA-WMO-NEXT: and a1, a1, a3 ; RV64IA-WMO-NEXT: and a2, a2, a3 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-WMO-NEXT: and a3, a2, a5 -; RV64IA-WMO-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 +; RV64IA-WMO-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-WMO-NEXT: xor a3, a2, a0 -; RV64IA-WMO-NEXT: and a3, a3, a5 -; RV64IA-WMO-NEXT: xor a3, a2, a3 -; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-WMO-NEXT: bnez a3, .LBB19_1 +; RV64IA-WMO-NEXT: xor a4, a3, a2 +; RV64IA-WMO-NEXT: and a4, a4, a5 +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB19_1 ; RV64IA-WMO-NEXT: .LBB19_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-ZACAS: # %bb.0: ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: andi a4, a0, -4 -; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: andi a4, a0, 3 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: slli a4, a4, 3 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 ; RV64IA-ZACAS-NEXT: and a1, a1, a3 ; RV64IA-ZACAS-NEXT: and a2, a2, a3 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-ZACAS-NEXT: and a3, a2, a5 -; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 +; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a3, a2, a0 -; RV64IA-ZACAS-NEXT: and a3, a3, a5 -; RV64IA-ZACAS-NEXT: xor a3, a2, a3 -; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-ZACAS-NEXT: bnez a3, .LBB19_1 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 +; RV64IA-ZACAS-NEXT: and a4, a4, a5 +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-ZACAS-NEXT: bnez a4, .LBB19_1 ; RV64IA-ZACAS-NEXT: .LBB19_3: ; RV64IA-ZACAS-NEXT: ret ; @@ -3923,25 +3923,25 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: lui a3, 16 -; RV64IA-TSO-NEXT: andi a4, a0, -4 -; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: andi a4, a0, 3 ; RV64IA-TSO-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: slli a4, a4, 3 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 ; RV64IA-TSO-NEXT: and a1, a1, a3 ; RV64IA-TSO-NEXT: and a2, a2, a3 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) -; RV64IA-TSO-NEXT: and a3, a2, a5 -; RV64IA-TSO-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 +; RV64IA-TSO-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-TSO-NEXT: xor a3, a2, a0 -; RV64IA-TSO-NEXT: and a3, a3, a5 -; RV64IA-TSO-NEXT: xor a3, a2, a3 -; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) -; RV64IA-TSO-NEXT: bnez a3, .LBB19_1 +; RV64IA-TSO-NEXT: xor a4, a3, a2 +; RV64IA-TSO-NEXT: and a4, a4, a5 +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB19_1 ; RV64IA-TSO-NEXT: .LBB19_3: ; RV64IA-TSO-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll index 1294bcf5b7500..41b3655429cb1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll @@ -17,8 +17,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) nounwind { ; RV32IA-NEXT: li a2, 255 ; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -53,8 +53,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) nounwind { ; RV64IA-NEXT: li a2, 255 ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: andi a0, a0, 3 -; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -91,23 +91,23 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) nounwind { ; RV32IA-LABEL: atomicrmw_add_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a4, a2, a0 ; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: add a5, a2, a1 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NEXT: add a5, a4, a1 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB1_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a2, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV32I-LABEL: atomicrmw_add_i16: @@ -128,23 +128,23 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) nounwind { ; RV64IA-LABEL: atomicrmw_add_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: andi a0, a0, 3 -; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: sllw a4, a2, a0 ; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-NEXT: add a5, a2, a1 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 +; RV64IA-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NEXT: add a5, a4, a1 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB1_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a2, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16: @@ -270,8 +270,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) nounwind { ; RV32IA-NEXT: li a2, 255 ; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -307,8 +307,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) nounwind { ; RV64IA-NEXT: li a2, 255 ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: andi a0, a0, 3 -; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 @@ -346,23 +346,23 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) nounwind { ; RV32IA-LABEL: atomicrmw_sub_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a4, a2, a0 ; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: sub a5, a2, a1 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NEXT: sub a5, a4, a1 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB5_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a2, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: ret ; ; RV32I-LABEL: atomicrmw_sub_i16: @@ -384,23 +384,23 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) nounwind { ; RV64IA-LABEL: atomicrmw_sub_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: andi a0, a0, 3 -; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: sllw a4, a2, a0 ; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-NEXT: sub a5, a2, a1 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 +; RV64IA-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NEXT: sub a5, a4, a1 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB5_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a2, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16: @@ -531,22 +531,22 @@ define i16 @atomicrmw_sub_i16_constant(ptr %a) nounwind { ; RV32IA: # %bb.0: ; RV32IA-NEXT: lui a1, 16 ; RV32IA-NEXT: li a2, 1 -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: andi a3, a0, 3 ; RV32IA-NEXT: addi a1, a1, -1 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a2, a2, a0 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a3, a3, 3 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: sub a5, a4, a2 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: and a5, a5, a1 ; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB8_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 +; RV32IA-NEXT: srl a0, a4, a3 ; RV32IA-NEXT: ret ; ; RV32I-LABEL: atomicrmw_sub_i16_constant: @@ -571,22 +571,22 @@ define i16 @atomicrmw_sub_i16_constant(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: lui a1, 16 ; RV64IA-NEXT: li a2, 1 -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: andi a0, a0, 3 +; RV64IA-NEXT: andi a3, a0, 3 ; RV64IA-NEXT: addi a1, a1, -1 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a2, a2, a0 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: sub a5, a4, a2 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: and a5, a5, a1 ; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB8_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a4, a3 ; RV64IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_constant: @@ -614,21 +614,21 @@ define i8 @atomicrmw_sub_i8_constant(ptr %a) nounwind { ; RV32IA: # %bb.0: ; RV32IA-NEXT: li a1, 255 ; RV32IA-NEXT: li a2, 1 -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: andi a0, a0, 3 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a2, a2, a0 +; RV32IA-NEXT: andi a3, a0, 3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: slli a3, a3, 3 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NEXT: lr.w.aqrl a4, (a0) ; RV32IA-NEXT: sub a5, a4, a2 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: and a5, a5, a1 ; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB9_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a0 +; RV32IA-NEXT: srl a0, a4, a3 ; RV32IA-NEXT: ret ; ; RV32I-LABEL: atomicrmw_sub_i8_constant: @@ -653,21 +653,21 @@ define i8 @atomicrmw_sub_i8_constant(ptr %a) nounwind { ; RV64IA: # %bb.0: ; RV64IA-NEXT: li a1, 255 ; RV64IA-NEXT: li a2, 1 -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: andi a0, a0, 3 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a2, a2, a0 +; RV64IA-NEXT: andi a3, a0, 3 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NEXT: lr.w.aqrl a4, (a0) ; RV64IA-NEXT: sub a5, a4, a2 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: and a5, a5, a1 ; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB9_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a4, a3 ; RV64IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i8_constant: @@ -859,12 +859,11 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV32IA-ZABHA: # %bb.0: ; RV32IA-ZABHA-NEXT: addi sp, sp, -16 ; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-ZABHA-NEXT: sub a5, a1, a3 -; RV32IA-ZABHA-NEXT: sltu a1, a1, a3 +; RV32IA-ZABHA-NEXT: sltu a5, a1, a3 ; RV32IA-ZABHA-NEXT: sub a2, a2, a4 -; RV32IA-ZABHA-NEXT: sub a2, a2, a1 +; RV32IA-ZABHA-NEXT: sub a1, a1, a3 +; RV32IA-ZABHA-NEXT: sub a2, a2, a5 ; RV32IA-ZABHA-NEXT: li a3, 5 -; RV32IA-ZABHA-NEXT: mv a1, a5 ; RV32IA-ZABHA-NEXT: call __atomic_fetch_sub_8 ; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IA-ZABHA-NEXT: addi sp, sp, 16 @@ -874,12 +873,11 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV32IA: # %bb.0: ; RV32IA-NEXT: addi sp, sp, -16 ; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IA-NEXT: sub a5, a1, a3 -; RV32IA-NEXT: sltu a1, a1, a3 +; RV32IA-NEXT: sltu a5, a1, a3 ; RV32IA-NEXT: sub a2, a2, a4 -; RV32IA-NEXT: sub a2, a2, a1 +; RV32IA-NEXT: sub a1, a1, a3 +; RV32IA-NEXT: sub a2, a2, a5 ; RV32IA-NEXT: li a3, 5 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: call __atomic_fetch_sub_8 ; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IA-NEXT: addi sp, sp, 16 @@ -889,12 +887,11 @@ define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub a5, a1, a3 -; RV32I-NEXT: sltu a1, a1, a3 +; RV32I-NEXT: sltu a5, a1, a3 ; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a2, a2, a5 ; RV32I-NEXT: li a3, 5 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: call __atomic_fetch_sub_8 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll index 68bc1e5db6095..6fab72dcfe349 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -58,10 +58,10 @@ define i4 @bitreverse_i4(i4 %x) { ; RV32-NEXT: andi a0, a0, 15 ; RV32-NEXT: andi a1, a1, 8 ; RV32-NEXT: andi a2, a2, 4 +; RV32-NEXT: srli a3, a0, 1 ; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: srli a2, a0, 1 -; RV32-NEXT: andi a2, a2, 2 -; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: andi a3, a3, 2 +; RV32-NEXT: or a1, a1, a3 ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret @@ -73,10 +73,10 @@ define i4 @bitreverse_i4(i4 %x) { ; RV64-NEXT: andi a0, a0, 15 ; RV64-NEXT: andi a1, a1, 8 ; RV64-NEXT: andi a2, a2, 4 +; RV64-NEXT: srli a3, a0, 1 ; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: srli a2, a0, 1 -; RV64-NEXT: andi a2, a2, 2 -; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: andi a3, a3, 2 +; RV64-NEXT: or a1, a1, a3 ; RV64-NEXT: srli a0, a0, 3 ; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: ret @@ -89,20 +89,20 @@ define i7 @bitreverse_i7(i7 %x) { ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a0, 6 ; RV32-NEXT: slli a2, a0, 4 -; RV32-NEXT: slli a3, a0, 2 -; RV32-NEXT: andi a0, a0, 127 ; RV32-NEXT: andi a1, a1, 64 ; RV32-NEXT: andi a2, a2, 32 +; RV32-NEXT: slli a3, a0, 2 +; RV32-NEXT: andi a0, a0, 127 ; RV32-NEXT: andi a3, a3, 16 +; RV32-NEXT: andi a4, a0, 8 ; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: andi a2, a0, 8 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: srli a3, a0, 2 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: srli a2, a0, 4 -; RV32-NEXT: andi a3, a3, 4 -; RV32-NEXT: andi a2, a2, 2 -; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: or a3, a3, a4 +; RV32-NEXT: srli a2, a0, 2 +; RV32-NEXT: srli a4, a0, 4 +; RV32-NEXT: andi a2, a2, 4 +; RV32-NEXT: andi a4, a4, 2 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: or a2, a2, a4 ; RV32-NEXT: or a1, a1, a2 ; RV32-NEXT: srli a0, a0, 6 ; RV32-NEXT: or a0, a1, a0 @@ -112,20 +112,20 @@ define i7 @bitreverse_i7(i7 %x) { ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a0, 6 ; RV64-NEXT: slli a2, a0, 4 -; RV64-NEXT: slli a3, a0, 2 -; RV64-NEXT: andi a0, a0, 127 ; RV64-NEXT: andi a1, a1, 64 ; RV64-NEXT: andi a2, a2, 32 +; RV64-NEXT: slli a3, a0, 2 +; RV64-NEXT: andi a0, a0, 127 ; RV64-NEXT: andi a3, a3, 16 +; RV64-NEXT: andi a4, a0, 8 ; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: andi a2, a0, 8 -; RV64-NEXT: or a2, a3, a2 -; RV64-NEXT: srli a3, a0, 2 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: srli a2, a0, 4 -; RV64-NEXT: andi a3, a3, 4 -; RV64-NEXT: andi a2, a2, 2 -; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: or a3, a3, a4 +; RV64-NEXT: srli a2, a0, 2 +; RV64-NEXT: srli a4, a0, 4 +; RV64-NEXT: andi a2, a2, 4 +; RV64-NEXT: andi a4, a4, 2 +; RV64-NEXT: or a1, a1, a3 +; RV64-NEXT: or a2, a2, a4 ; RV64-NEXT: or a1, a1, a2 ; RV64-NEXT: srli a0, a0, 6 ; RV64-NEXT: or a0, a1, a0 @@ -137,70 +137,70 @@ define i7 @bitreverse_i7(i7 %x) { define i24 @bitreverse_i24(i24 %x) { ; RV32-LABEL: bitreverse_i24: ; RV32: # %bb.0: -; RV32-NEXT: slli a1, a0, 16 -; RV32-NEXT: lui a2, 4096 +; RV32-NEXT: lui a1, 4096 +; RV32-NEXT: slli a2, a0, 16 +; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: lui a3, 1048335 -; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: addi a3, a3, 240 -; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a1, a3, a2 -; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: and a2, a3, a1 +; RV32-NEXT: and a2, a0, a2 ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lui a3, 1047757 +; RV32-NEXT: srli a2, a2, 4 ; RV32-NEXT: addi a3, a3, -820 -; RV32-NEXT: srli a1, a1, 4 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: and a1, a3, a2 -; RV32-NEXT: and a1, a0, a1 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: and a2, a3, a1 +; RV32-NEXT: and a2, a0, a2 ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lui a3, 1047211 +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: addi a3, a3, -1366 -; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: and a2, a0, a2 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a1, a0, a1 ; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: srli a1, a1, 1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_i24: ; RV64: # %bb.0: -; RV64-NEXT: slli a1, a0, 16 -; RV64-NEXT: lui a2, 4096 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: slli a2, a0, 16 +; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: lui a3, 1048335 -; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: addi a3, a3, 240 -; RV64-NEXT: and a0, a0, a2 ; RV64-NEXT: srli a0, a0, 16 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: and a1, a3, a2 -; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: and a2, a3, a1 +; RV64-NEXT: and a2, a0, a2 ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: lui a3, 1047757 +; RV64-NEXT: srli a2, a2, 4 ; RV64-NEXT: addi a3, a3, -820 -; RV64-NEXT: srli a1, a1, 4 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: and a1, a3, a2 -; RV64-NEXT: and a1, a0, a1 +; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: and a2, a3, a1 +; RV64-NEXT: and a2, a0, a2 ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: lui a3, 1047211 +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: addi a3, a3, -1366 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: and a2, a0, a2 +; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: and a1, a3, a1 +; RV64-NEXT: and a1, a0, a1 ; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: srli a2, a2, 1 +; RV64-NEXT: srli a1, a1, 1 ; RV64-NEXT: and a0, a0, a3 -; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: ret %rev = call i24 @llvm.bitreverse.i24(i24 %x) ret i24 %rev diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll index 3a55189076dee..2cd7311ea3653 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll @@ -95,48 +95,48 @@ define i64 @expanded_neg_abs64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB2_2 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: beq a3, a1, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a1, a2 +; RV32I-NEXT: slt a4, a1, a3 ; RV32I-NEXT: beqz a4, .LBB2_3 ; RV32I-NEXT: j .LBB2_4 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: bnez a4, .LBB2_4 ; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB2_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_abs64: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB2_2 +; RV32ZBB-NEXT: sub a3, a3, a2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: beq a3, a1, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a4, a1, a2 +; RV32ZBB-NEXT: slt a4, a1, a3 ; RV32ZBB-NEXT: beqz a4, .LBB2_3 ; RV32ZBB-NEXT: j .LBB2_4 ; RV32ZBB-NEXT: .LBB2_2: -; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: bnez a4, .LBB2_4 ; RV32ZBB-NEXT: .LBB2_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB2_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_abs64: @@ -165,48 +165,48 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB3_2 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: beq a3, a1, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: beqz a4, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: bnez a4, .LBB3_4 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_abs64_unsigned: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB3_2 +; RV32ZBB-NEXT: sub a3, a3, a2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: beq a3, a1, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a4, a1, a2 +; RV32ZBB-NEXT: sltu a4, a1, a3 ; RV32ZBB-NEXT: beqz a4, .LBB3_3 ; RV32ZBB-NEXT: j .LBB3_4 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: bnez a4, .LBB3_4 ; RV32ZBB-NEXT: .LBB3_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB3_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_abs64_unsigned: @@ -317,48 +317,48 @@ define i64 @expanded_neg_inv_abs64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB6_2 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: beq a3, a1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a2, a1 +; RV32I-NEXT: slt a4, a3, a1 ; RV32I-NEXT: beqz a4, .LBB6_3 ; RV32I-NEXT: j .LBB6_4 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: bnez a4, .LBB6_4 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB6_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_inv_abs64: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB6_2 +; RV32ZBB-NEXT: sub a3, a3, a2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: beq a3, a1, .LBB6_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a4, a2, a1 +; RV32ZBB-NEXT: slt a4, a3, a1 ; RV32ZBB-NEXT: beqz a4, .LBB6_3 ; RV32ZBB-NEXT: j .LBB6_4 ; RV32ZBB-NEXT: .LBB6_2: -; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: bnez a4, .LBB6_4 ; RV32ZBB-NEXT: .LBB6_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB6_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_inv_abs64: @@ -387,48 +387,48 @@ define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB7_2 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: beq a3, a1, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: sltu a4, a3, a1 ; RV32I-NEXT: beqz a4, .LBB7_3 ; RV32I-NEXT: j .LBB7_4 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: bnez a4, .LBB7_4 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB7_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB7_2 +; RV32ZBB-NEXT: sub a3, a3, a2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: beq a3, a1, .LBB7_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: sltu a4, a3, a1 ; RV32ZBB-NEXT: beqz a4, .LBB7_3 ; RV32ZBB-NEXT: j .LBB7_4 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: bnez a4, .LBB7_4 ; RV32ZBB-NEXT: .LBB7_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB7_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_inv_abs64_unsigned: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll index b24ea9ec1561e..aa1c408a2b7ca 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/constbarrier-rv32.ll @@ -20,25 +20,24 @@ entry: define void @constant_fold_barrier_i128(ptr %p) { ; RV32-LABEL: constant_fold_barrier_i128: ; RV32: # %bb.0: # %entry -; RV32-NEXT: li a1, 1 -; RV32-NEXT: lw a2, 0(a0) -; RV32-NEXT: lw a3, 4(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: li a3, 1 +; RV32-NEXT: slli a3, a3, 11 ; RV32-NEXT: lw a4, 8(a0) ; RV32-NEXT: lw a5, 12(a0) -; RV32-NEXT: slli a1, a1, 11 -; RV32-NEXT: and a2, a2, a1 -; RV32-NEXT: add a2, a2, a1 -; RV32-NEXT: sltu a1, a2, a1 -; RV32-NEXT: mv a6, a1 -; RV32-NEXT: seqz a7, a1 -; RV32-NEXT: and a1, a7, a1 -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: seqz a3, a1 -; RV32-NEXT: and a1, a3, a1 -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a6, 4(a0) -; RV32-NEXT: sw a7, 8(a0) -; RV32-NEXT: sw a1, 12(a0) +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a3, a1, a3 +; RV32-NEXT: mv a7, a3 +; RV32-NEXT: seqz a6, a3 +; RV32-NEXT: and a3, a6, a3 +; RV32-NEXT: seqz a6, a3 +; RV32-NEXT: and a2, a6, a3 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a7, 4(a0) +; RV32-NEXT: sw a3, 8(a0) +; RV32-NEXT: sw a2, 12(a0) ; RV32-NEXT: ret entry: %x = load i128, ptr %p diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll index 225ceed9627b7..7985dfece07a1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll @@ -22,11 +22,11 @@ define i32 @udiv_constant_no_add(i32 %a) nounwind { ; ; RV64IM-LABEL: udiv_constant_no_add: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: lui a1, 205 -; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: addi a1, a1, -819 +; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: addi a1, a1, -819 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 34 @@ -34,9 +34,9 @@ define i32 @udiv_constant_no_add(i32 %a) nounwind { ; ; RV64IMZB-LABEL: udiv_constant_no_add: ; RV64IMZB: # %bb.0: -; RV64IMZB-NEXT: zext.w a0, a0 ; RV64IMZB-NEXT: lui a1, 838861 ; RV64IMZB-NEXT: addi a1, a1, -819 +; RV64IMZB-NEXT: zext.w a0, a0 ; RV64IMZB-NEXT: zext.w a1, a1 ; RV64IMZB-NEXT: mul a0, a0, a1 ; RV64IMZB-NEXT: srli a0, a0, 34 @@ -92,37 +92,37 @@ define i64 @udiv64_constant_no_add(i64 %a) nounwind { ; RV32-LABEL: udiv64_constant_no_add: ; RV32: # %bb.0: ; RV32-NEXT: lui a2, 838861 -; RV32-NEXT: addi a4, a2, -819 +; RV32-NEXT: addi a3, a2, -819 ; RV32-NEXT: addi a2, a2, -820 -; RV32-NEXT: mul a5, a1, a4 -; RV32-NEXT: mul a6, a0, a2 -; RV32-NEXT: mulhu a7, a0, a4 -; RV32-NEXT: mul t1, a1, a2 -; RV32-NEXT: mulhu t2, a1, a4 -; RV32-NEXT: mulhu a0, a0, a2 +; RV32-NEXT: mul a4, a1, a3 +; RV32-NEXT: mul a5, a0, a2 +; RV32-NEXT: mul a7, a1, a2 +; RV32-NEXT: mulhu t0, a0, a3 +; RV32-NEXT: mulhu t1, a1, a3 +; RV32-NEXT: mulhu t2, a0, a2 ; RV32-NEXT: mulhu a1, a1, a2 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: mv t0, t1 -; RV32-NEXT: sltu a4, a5, a6 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: mv a2, a7 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: add a4, a4, t0 +; RV32-NEXT: sltu a7, a7, a7 +; RV32-NEXT: sltu a4, a4, t0 +; RV32-NEXT: sltiu t0, a2, 0 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a2, a2, t1 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a5, a2, t1 +; RV32-NEXT: add a2, a2, t2 +; RV32-NEXT: add a5, a7, a5 +; RV32-NEXT: sltu a7, a2, t2 ; RV32-NEXT: add a5, a5, a7 -; RV32-NEXT: sltu a6, t1, t1 -; RV32-NEXT: sltiu t1, t1, 0 -; RV32-NEXT: add t0, t0, t2 -; RV32-NEXT: sltu a2, a5, a7 -; RV32-NEXT: add a6, a6, t1 -; RV32-NEXT: sltu a5, t0, t2 -; RV32-NEXT: add t0, t0, a0 -; RV32-NEXT: add a2, a4, a2 -; RV32-NEXT: add a5, a6, a5 -; RV32-NEXT: sltu a0, t0, a0 -; RV32-NEXT: add a0, a5, a0 -; RV32-NEXT: add t0, t0, a2 -; RV32-NEXT: sltu a2, t0, a2 -; RV32-NEXT: srli a3, t0, 2 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: sltu a3, a2, a4 +; RV32-NEXT: add a3, a5, a3 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: or a0, a2, a0 ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: ret ; @@ -150,46 +150,46 @@ define i64 @udiv64_constant_add(i64 %a) nounwind { ; RV32-NEXT: lui a3, 149797 ; RV32-NEXT: addi a2, a2, 1171 ; RV32-NEXT: addi a3, a3, -1756 -; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: mul a6, a0, a3 -; RV32-NEXT: mulhu a7, a0, a2 -; RV32-NEXT: mulhu t2, a1, a3 -; RV32-NEXT: mv t1, t2 -; RV32-NEXT: mul t2, a1, a3 -; RV32-NEXT: mulhu a2, a1, a2 -; RV32-NEXT: mulhu a3, a0, a3 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: mv t0, t2 -; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: mul a4, a1, a2 +; RV32-NEXT: mul a5, a0, a3 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: mulhu t0, a0, a2 +; RV32-NEXT: mulhu t1, a1, a2 +; RV32-NEXT: mulhu t2, a0, a3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: mv t3, a7 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: add a4, a4, t0 +; RV32-NEXT: sltu a7, a7, a7 +; RV32-NEXT: sltu a4, a4, t0 +; RV32-NEXT: sltiu t0, t3, 0 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: mulhu a3, a1, a3 +; RV32-NEXT: add t3, t3, t1 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a5, t3, t1 +; RV32-NEXT: add a5, a7, a5 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: sltu a7, t3, t2 +; RV32-NEXT: add t3, t3, a4 ; RV32-NEXT: add a5, a5, a7 -; RV32-NEXT: sltu t2, t2, t2 -; RV32-NEXT: sltu a5, a5, a7 -; RV32-NEXT: sltiu a7, t0, 0 -; RV32-NEXT: add t0, t0, a2 -; RV32-NEXT: add a7, t2, a7 -; RV32-NEXT: sltu a2, t0, a2 -; RV32-NEXT: add t0, t0, a3 -; RV32-NEXT: add a5, a6, a5 -; RV32-NEXT: add a2, a7, a2 -; RV32-NEXT: sltu a3, t0, a3 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: add t0, t0, a5 -; RV32-NEXT: sltu a3, t0, a5 -; RV32-NEXT: sub a5, a0, t0 -; RV32-NEXT: sltu a0, a0, t0 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: srli a5, a5, 1 -; RV32-NEXT: add a2, t1, a2 +; RV32-NEXT: sltu a4, t3, a4 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: mv a2, a3 +; RV32-NEXT: sltu a3, a0, t3 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: sub a1, a1, a3 +; RV32-NEXT: sub a0, a0, t3 ; RV32-NEXT: sub a1, a1, a2 -; RV32-NEXT: slli a0, a1, 31 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: slli a3, a1, 31 +; RV32-NEXT: or a0, a0, a3 ; RV32-NEXT: srli a1, a1, 1 -; RV32-NEXT: or a0, a5, a0 +; RV32-NEXT: add a0, a0, t3 +; RV32-NEXT: sltu a3, a0, t3 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: add a0, a0, t0 -; RV32-NEXT: sltu a2, a0, t0 +; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: srli a0, a0, 2 -; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: slli a2, a1, 30 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: srli a1, a1, 2 @@ -272,8 +272,8 @@ define i16 @udiv16_constant_no_add(i16 %a) nounwind { ; ; RV32IMZB-LABEL: udiv16_constant_no_add: ; RV32IMZB: # %bb.0: -; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: lui a1, 13 +; RV32IMZB-NEXT: zext.h a0, a0 ; RV32IMZB-NEXT: addi a1, a1, -819 ; RV32IMZB-NEXT: mul a0, a0, a1 ; RV32IMZB-NEXT: srli a0, a0, 18 @@ -291,8 +291,8 @@ define i16 @udiv16_constant_no_add(i16 %a) nounwind { ; ; RV64IMZB-LABEL: udiv16_constant_no_add: ; RV64IMZB: # %bb.0: -; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: lui a1, 13 +; RV64IMZB-NEXT: zext.h a0, a0 ; RV64IMZB-NEXT: addi a1, a1, -819 ; RV64IMZB-NEXT: mul a0, a0, a1 ; RV64IMZB-NEXT: srli a0, a0, 18 @@ -304,17 +304,17 @@ define i16 @udiv16_constant_no_add(i16 %a) nounwind { define i16 @udiv16_constant_add(i16 %a) nounwind { ; RV32IM-LABEL: udiv16_constant_add: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lui a1, 2 -; RV32IM-NEXT: lui a2, 16 -; RV32IM-NEXT: addi a1, a1, 1171 -; RV32IM-NEXT: addi a2, a2, -1 -; RV32IM-NEXT: and a3, a0, a2 -; RV32IM-NEXT: mul a1, a3, a1 -; RV32IM-NEXT: srli a1, a1, 16 -; RV32IM-NEXT: sub a0, a0, a1 -; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: lui a1, 16 +; RV32IM-NEXT: lui a2, 2 +; RV32IM-NEXT: addi a1, a1, -1 +; RV32IM-NEXT: addi a2, a2, 1171 +; RV32IM-NEXT: and a3, a0, a1 +; RV32IM-NEXT: mul a2, a3, a2 +; RV32IM-NEXT: srli a2, a2, 16 +; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: and a0, a0, a1 ; RV32IM-NEXT: srli a0, a0, 1 -; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: add a0, a0, a2 ; RV32IM-NEXT: srli a0, a0, 2 ; RV32IM-NEXT: ret ; @@ -334,17 +334,17 @@ define i16 @udiv16_constant_add(i16 %a) nounwind { ; ; RV64IM-LABEL: udiv16_constant_add: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 2 -; RV64IM-NEXT: lui a2, 16 -; RV64IM-NEXT: addi a1, a1, 1171 -; RV64IM-NEXT: addi a2, a2, -1 -; RV64IM-NEXT: and a3, a0, a2 -; RV64IM-NEXT: mul a1, a3, a1 -; RV64IM-NEXT: srli a1, a1, 16 -; RV64IM-NEXT: sub a0, a0, a1 -; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: lui a2, 2 +; RV64IM-NEXT: addi a1, a1, -1 +; RV64IM-NEXT: addi a2, a2, 1171 +; RV64IM-NEXT: and a3, a0, a1 +; RV64IM-NEXT: mul a2, a3, a2 +; RV64IM-NEXT: srli a2, a2, 16 +; RV64IM-NEXT: sub a0, a0, a2 +; RV64IM-NEXT: and a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 1 -; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: add a0, a0, a2 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret ; @@ -590,8 +590,8 @@ define i64 @sdiv64_constant_sub_srai(i64 %a) nounwind { define i8 @sdiv8_constant_no_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_no_srai: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 86 ; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: li a1, 86 ; RV32IM-NEXT: srai a0, a0, 24 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: slli a0, a0, 16 @@ -615,8 +615,8 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind { ; ; RV64IM-LABEL: sdiv8_constant_no_srai: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 86 ; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: li a1, 86 ; RV64IM-NEXT: srai a0, a0, 56 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 48 @@ -644,8 +644,8 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind { define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_srai: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 103 ; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: li a1, 103 ; RV32IM-NEXT: srai a0, a0, 24 ; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: slli a0, a0, 16 @@ -669,8 +669,8 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { ; ; RV64IM-LABEL: sdiv8_constant_srai: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 103 ; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: li a1, 103 ; RV64IM-NEXT: srai a0, a0, 56 ; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 48 @@ -698,10 +698,10 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind { define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_add_srai: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, -109 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: srai a2, a2, 24 -; RV32IM-NEXT: mul a1, a2, a1 +; RV32IM-NEXT: slli a1, a0, 24 +; RV32IM-NEXT: li a2, -109 +; RV32IM-NEXT: srai a1, a1, 24 +; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: slli a1, a1, 16 ; RV32IM-NEXT: srai a1, a1, 24 ; RV32IM-NEXT: add a0, a1, a0 @@ -729,10 +729,10 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { ; ; RV64IM-LABEL: sdiv8_constant_add_srai: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, -109 -; RV64IM-NEXT: slli a2, a0, 56 -; RV64IM-NEXT: srai a2, a2, 56 -; RV64IM-NEXT: mul a1, a2, a1 +; RV64IM-NEXT: slli a1, a0, 56 +; RV64IM-NEXT: li a2, -109 +; RV64IM-NEXT: srai a1, a1, 56 +; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: slli a1, a1, 48 ; RV64IM-NEXT: srai a1, a1, 56 ; RV64IM-NEXT: add a0, a1, a0 @@ -764,10 +764,10 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind { define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_constant_sub_srai: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 109 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: srai a2, a2, 24 -; RV32IM-NEXT: mul a1, a2, a1 +; RV32IM-NEXT: slli a1, a0, 24 +; RV32IM-NEXT: li a2, 109 +; RV32IM-NEXT: srai a1, a1, 24 +; RV32IM-NEXT: mul a1, a1, a2 ; RV32IM-NEXT: slli a1, a1, 16 ; RV32IM-NEXT: srai a1, a1, 24 ; RV32IM-NEXT: sub a1, a1, a0 @@ -795,10 +795,10 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind { ; ; RV64IM-LABEL: sdiv8_constant_sub_srai: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 109 -; RV64IM-NEXT: slli a2, a0, 56 -; RV64IM-NEXT: srai a2, a2, 56 -; RV64IM-NEXT: mul a1, a2, a1 +; RV64IM-NEXT: slli a1, a0, 56 +; RV64IM-NEXT: li a2, 109 +; RV64IM-NEXT: srai a1, a1, 56 +; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: slli a1, a1, 48 ; RV64IM-NEXT: srai a1, a1, 56 ; RV64IM-NEXT: sub a1, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll index 4246aa545dd0e..ab2009da49a04 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll @@ -239,8 +239,8 @@ define double @fsgnjn_d(double %a, double %b) nounwind { ; RV32I-LABEL: fsgnjn_d: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: xor a3, a3, a2 +; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: srli a1, a1, 1 ; RV32I-NEXT: and a2, a3, a2 ; RV32I-NEXT: or a1, a1, a2 @@ -249,9 +249,9 @@ define double @fsgnjn_d(double %a, double %b) nounwind { ; RV64I-LABEL: fsgnjn_d: ; RV64I: # %bb.0: ; RV64I-NEXT: li a2, -1 -; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: slli a2, a2, 63 ; RV64I-NEXT: xor a1, a1, a2 +; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: srli a0, a0, 1 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: or a0, a0, a1 @@ -1094,8 +1094,8 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 ; RV32IFD-NEXT: fadd.d fa3, fa1, fa5 -; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 ; RV32IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 ; RV32IFD-NEXT: fneg.d fa4, fa4 ; RV32IFD-NEXT: fsub.d fa0, fa4, fa5 ; RV32IFD-NEXT: ret @@ -1105,8 +1105,8 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV64IFD-NEXT: fmv.d.x fa5, zero ; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 ; RV64IFD-NEXT: fadd.d fa3, fa1, fa5 -; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 ; RV64IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 ; RV64IFD-NEXT: fneg.d fa4, fa4 ; RV64IFD-NEXT: fsub.d fa0, fa4, fa5 ; RV64IFD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll index 4b0acda839ad6..930a648a16a72 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll @@ -1036,12 +1036,12 @@ define i1 @isnan_d_fpclass(double %x) { ; ; RV32I-LABEL: isnan_d_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a2, 524032 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: beq a1, a2, .LBB25_2 +; RV32I-NEXT: slli a2, a1, 1 +; RV32I-NEXT: lui a1, 524032 +; RV32I-NEXT: srli a2, a2, 1 +; RV32I-NEXT: beq a2, a1, .LBB25_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a0, a2, a1 +; RV32I-NEXT: sltu a0, a1, a2 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB25_2: ; RV32I-NEXT: snez a0, a0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll index 3222849641baf..9346372529e53 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll @@ -237,8 +237,8 @@ define float @fsgnjn_s(float %a, float %b) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: slli s0, s0, 1 ; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: slli s0, s0, 1 ; RV32I-NEXT: srli s0, s0, 1 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: or a0, s0, a0 @@ -255,8 +255,8 @@ define float @fsgnjn_s(float %a, float %b) nounwind { ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: slli s0, s0, 33 ; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: slli s0, s0, 33 ; RV64I-NEXT: srli s0, s0, 33 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: or a0, s0, a0 @@ -965,8 +965,8 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { ; CHECKIF-NEXT: fmv.w.x fa5, zero ; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 ; CHECKIF-NEXT: fadd.s fa3, fa1, fa5 -; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 ; CHECKIF-NEXT: fmul.s fa4, fa4, fa3 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 ; CHECKIF-NEXT: fneg.s fa4, fa4 ; CHECKIF-NEXT: fsub.s fa0, fa4, fa5 ; CHECKIF-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll index 01d9ceb0a0860..c6703d85b9e78 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll @@ -1194,23 +1194,23 @@ define i1 @fpclass(float %x) { ; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a2, a0, 1 ; RV32I-NEXT: lui a3, 2048 -; RV32I-NEXT: lui a4, 1046528 -; RV32I-NEXT: srli a5, a2, 1 -; RV32I-NEXT: addi a3, a3, -1 -; RV32I-NEXT: xor a0, a0, a5 -; RV32I-NEXT: xor a6, a5, a1 -; RV32I-NEXT: sltu a1, a1, a5 -; RV32I-NEXT: add a4, a5, a4 -; RV32I-NEXT: addi a5, a5, -1 -; RV32I-NEXT: sltu a3, a5, a3 -; RV32I-NEXT: lui a5, 520192 -; RV32I-NEXT: sltu a4, a4, a5 +; RV32I-NEXT: srli a4, a2, 1 ; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: xor a0, a0, a4 +; RV32I-NEXT: addi a5, a4, -1 ; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: xor a6, a4, a1 +; RV32I-NEXT: sltu a3, a5, a3 ; RV32I-NEXT: seqz a5, a6 -; RV32I-NEXT: and a3, a3, a0 ; RV32I-NEXT: or a2, a2, a5 -; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: lui a5, 1046528 +; RV32I-NEXT: add a5, a4, a5 +; RV32I-NEXT: lui a6, 520192 +; RV32I-NEXT: sltu a5, a5, a6 +; RV32I-NEXT: and a3, a3, a0 +; RV32I-NEXT: sltu a1, a1, a4 +; RV32I-NEXT: and a0, a5, a0 ; RV32I-NEXT: or a2, a2, a3 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: or a0, a2, a0 @@ -1221,26 +1221,26 @@ define i1 @fpclass(float %x) { ; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a2, a0, 33 ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: li a3, 1 -; RV64I-NEXT: lui a4, 2048 -; RV64I-NEXT: srli a5, a2, 33 -; RV64I-NEXT: xor a0, a0, a5 -; RV64I-NEXT: subw a3, a5, a3 -; RV64I-NEXT: xor a6, a5, a1 -; RV64I-NEXT: sltu a1, a1, a5 -; RV64I-NEXT: subw a5, a5, a4 -; RV64I-NEXT: addi a4, a4, -1 -; RV64I-NEXT: sltu a3, a3, a4 -; RV64I-NEXT: lui a4, 520192 +; RV64I-NEXT: srli a3, a2, 33 ; RV64I-NEXT: seqz a2, a2 +; RV64I-NEXT: xor a0, a0, a3 +; RV64I-NEXT: li a4, 1 ; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: lui a5, 2048 +; RV64I-NEXT: subw a4, a3, a4 +; RV64I-NEXT: addi a6, a5, -1 +; RV64I-NEXT: sltu a4, a4, a6 +; RV64I-NEXT: xor a6, a3, a1 +; RV64I-NEXT: and a4, a4, a0 ; RV64I-NEXT: seqz a6, a6 -; RV64I-NEXT: sltu a4, a5, a4 -; RV64I-NEXT: and a3, a3, a0 ; RV64I-NEXT: or a2, a2, a6 +; RV64I-NEXT: sltu a1, a1, a3 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: lui a5, 520192 ; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: and a0, a4, a0 +; RV64I-NEXT: sltu a2, a3, a5 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: and a0, a2, a0 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret %cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639) @@ -1271,16 +1271,16 @@ define i1 @isnan_fpclass(float %x) { ; ; RV32I-LABEL: isnan_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: sltu a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: isnan_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: sltu a0, a1, a0 ; RV64I-NEXT: ret @@ -1357,8 +1357,8 @@ define i1 @issnan_fpclass(float %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: lui a2, 523264 ; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: lui a2, 523264 ; RV32I-NEXT: sltu a1, a1, a0 ; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: and a0, a1, a0 @@ -1368,8 +1368,8 @@ define i1 @issnan_fpclass(float %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a0, a0, 33 -; RV64I-NEXT: lui a2, 523264 ; RV64I-NEXT: srli a0, a0, 33 +; RV64I-NEXT: lui a2, 523264 ; RV64I-NEXT: sltu a1, a1, a0 ; RV64I-NEXT: sltu a0, a0, a2 ; RV64I-NEXT: and a0, a1, a0 @@ -1402,8 +1402,8 @@ define i1 @isinf_fpclass(float %x) { ; ; RV32I-LABEL: isinf_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: seqz a0, a0 @@ -1411,8 +1411,8 @@ define i1 @isinf_fpclass(float %x) { ; ; RV64I-LABEL: isinf_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: seqz a0, a0 @@ -1525,16 +1525,16 @@ define i1 @isfinite_fpclass(float %x) { ; ; RV32I-LABEL: isfinite_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: sltu a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: isfinite_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: sltu a0, a0, a1 ; RV64I-NEXT: ret @@ -1604,24 +1604,24 @@ define i1 @isnegfinite_fpclass(float %x) { ; ; RV32I-LABEL: isnegfinite_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 522240 -; RV32I-NEXT: slli a2, a0, 1 -; RV32I-NEXT: srli a2, a2, 1 -; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: sltu a1, a2, a1 +; RV32I-NEXT: sltu a1, a1, a2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: isnegfinite_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 522240 -; RV64I-NEXT: slli a2, a0, 33 +; RV64I-NEXT: slli a1, a0, 33 +; RV64I-NEXT: srli a1, a1, 33 ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: srli a2, a2, 33 -; RV64I-NEXT: xor a0, a0, a2 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: sltu a1, a2, a1 +; RV64I-NEXT: sltu a1, a1, a2 ; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" @@ -1652,8 +1652,8 @@ define i1 @isnotfinite_fpclass(float %x) { ; ; RV32I-LABEL: isnotfinite_fpclass: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: lui a1, 522240 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: xor a2, a0, a1 ; RV32I-NEXT: seqz a2, a2 @@ -1663,8 +1663,8 @@ define i1 @isnotfinite_fpclass(float %x) { ; ; RV64I-LABEL: isnotfinite_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: slli a0, a0, 33 +; RV64I-NEXT: lui a1, 522240 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: xor a2, a0, a1 ; RV64I-NEXT: seqz a2, a2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll index 31a78d4f72ceb..860023a6684cb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll @@ -117,8 +117,8 @@ define i64 @abs64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: srai a2, a1, 31 ; RV32I-NEXT: add a0, a0, a2 -; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: sltu a3, a0, a2 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: xor a1, a1, a2 @@ -128,8 +128,8 @@ define i64 @abs64(i64 %x) { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: srai a2, a1, 31 ; RV32ZBB-NEXT: add a0, a0, a2 -; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: sltu a3, a0, a2 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: xor a0, a0, a2 ; RV32ZBB-NEXT: xor a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll index 46d1661983c6a..ae06a9400529e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll @@ -120,17 +120,17 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a6, a2, 63 -; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: bltu a6, a4, .LBB2_2 +; RV32I-NEXT: li a3, 32 +; RV32I-NEXT: bltu a6, a3, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: sll a7, a0, a6 ; RV32I-NEXT: j .LBB2_3 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: sll a3, a0, a2 -; RV32I-NEXT: neg a5, a6 -; RV32I-NEXT: srl a5, a0, a5 +; RV32I-NEXT: neg a4, a6 +; RV32I-NEXT: srl a5, a0, a4 ; RV32I-NEXT: sll a7, a1, a2 +; RV32I-NEXT: sll a4, a0, a2 ; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: .LBB2_3: ; RV32I-NEXT: neg a5, a2 @@ -140,28 +140,28 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: mv a2, a7 ; RV32I-NEXT: .LBB2_5: ; RV32I-NEXT: andi a6, a5, 63 -; RV32I-NEXT: bltu a6, a4, .LBB2_7 +; RV32I-NEXT: bltu a6, a3, .LBB2_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: srl a7, a1, a6 ; RV32I-NEXT: bnez a6, .LBB2_8 ; RV32I-NEXT: j .LBB2_9 ; RV32I-NEXT: .LBB2_7: -; RV32I-NEXT: srl a7, a0, a5 -; RV32I-NEXT: neg t0, a6 -; RV32I-NEXT: sll t0, a1, t0 -; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: neg a7, a6 +; RV32I-NEXT: srl t0, a0, a5 +; RV32I-NEXT: sll a7, a1, a7 +; RV32I-NEXT: or a7, t0, a7 ; RV32I-NEXT: beqz a6, .LBB2_9 ; RV32I-NEXT: .LBB2_8: ; RV32I-NEXT: mv a0, a7 ; RV32I-NEXT: .LBB2_9: -; RV32I-NEXT: bltu a6, a4, .LBB2_11 +; RV32I-NEXT: bltu a6, a3, .LBB2_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB2_12 ; RV32I-NEXT: .LBB2_11: ; RV32I-NEXT: srl a1, a1, a5 ; RV32I-NEXT: .LBB2_12: -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; @@ -176,17 +176,17 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-LABEL: rotl_64: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: andi a6, a2, 63 -; RV32ZBB-NEXT: li a4, 32 -; RV32ZBB-NEXT: bltu a6, a4, .LBB2_2 +; RV32ZBB-NEXT: li a3, 32 +; RV32ZBB-NEXT: bltu a6, a3, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: li a3, 0 +; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: sll a7, a0, a6 ; RV32ZBB-NEXT: j .LBB2_3 ; RV32ZBB-NEXT: .LBB2_2: -; RV32ZBB-NEXT: sll a3, a0, a2 -; RV32ZBB-NEXT: neg a5, a6 -; RV32ZBB-NEXT: srl a5, a0, a5 +; RV32ZBB-NEXT: neg a4, a6 +; RV32ZBB-NEXT: srl a5, a0, a4 ; RV32ZBB-NEXT: sll a7, a1, a2 +; RV32ZBB-NEXT: sll a4, a0, a2 ; RV32ZBB-NEXT: or a7, a5, a7 ; RV32ZBB-NEXT: .LBB2_3: ; RV32ZBB-NEXT: neg a5, a2 @@ -196,28 +196,28 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: mv a2, a7 ; RV32ZBB-NEXT: .LBB2_5: ; RV32ZBB-NEXT: andi a6, a5, 63 -; RV32ZBB-NEXT: bltu a6, a4, .LBB2_7 +; RV32ZBB-NEXT: bltu a6, a3, .LBB2_7 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: srl a7, a1, a6 ; RV32ZBB-NEXT: bnez a6, .LBB2_8 ; RV32ZBB-NEXT: j .LBB2_9 ; RV32ZBB-NEXT: .LBB2_7: -; RV32ZBB-NEXT: srl a7, a0, a5 -; RV32ZBB-NEXT: neg t0, a6 -; RV32ZBB-NEXT: sll t0, a1, t0 -; RV32ZBB-NEXT: or a7, a7, t0 +; RV32ZBB-NEXT: neg a7, a6 +; RV32ZBB-NEXT: srl t0, a0, a5 +; RV32ZBB-NEXT: sll a7, a1, a7 +; RV32ZBB-NEXT: or a7, t0, a7 ; RV32ZBB-NEXT: beqz a6, .LBB2_9 ; RV32ZBB-NEXT: .LBB2_8: ; RV32ZBB-NEXT: mv a0, a7 ; RV32ZBB-NEXT: .LBB2_9: -; RV32ZBB-NEXT: bltu a6, a4, .LBB2_11 +; RV32ZBB-NEXT: bltu a6, a3, .LBB2_11 ; RV32ZBB-NEXT: # %bb.10: ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: j .LBB2_12 ; RV32ZBB-NEXT: .LBB2_11: ; RV32ZBB-NEXT: srl a1, a1, a5 ; RV32ZBB-NEXT: .LBB2_12: -; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a0, a4, a0 ; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; @@ -229,17 +229,17 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-LABEL: rotl_64: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a6, a2, 63 -; RV32XTHEADBB-NEXT: li a4, 32 -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB2_2 +; RV32XTHEADBB-NEXT: li a3, 32 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB2_2 ; RV32XTHEADBB-NEXT: # %bb.1: -; RV32XTHEADBB-NEXT: li a3, 0 +; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: sll a7, a0, a6 ; RV32XTHEADBB-NEXT: j .LBB2_3 ; RV32XTHEADBB-NEXT: .LBB2_2: -; RV32XTHEADBB-NEXT: sll a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a5, a6 -; RV32XTHEADBB-NEXT: srl a5, a0, a5 +; RV32XTHEADBB-NEXT: neg a4, a6 +; RV32XTHEADBB-NEXT: srl a5, a0, a4 ; RV32XTHEADBB-NEXT: sll a7, a1, a2 +; RV32XTHEADBB-NEXT: sll a4, a0, a2 ; RV32XTHEADBB-NEXT: or a7, a5, a7 ; RV32XTHEADBB-NEXT: .LBB2_3: ; RV32XTHEADBB-NEXT: neg a5, a2 @@ -249,28 +249,28 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: mv a2, a7 ; RV32XTHEADBB-NEXT: .LBB2_5: ; RV32XTHEADBB-NEXT: andi a6, a5, 63 -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB2_7 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB2_7 ; RV32XTHEADBB-NEXT: # %bb.6: ; RV32XTHEADBB-NEXT: srl a7, a1, a6 ; RV32XTHEADBB-NEXT: bnez a6, .LBB2_8 ; RV32XTHEADBB-NEXT: j .LBB2_9 ; RV32XTHEADBB-NEXT: .LBB2_7: -; RV32XTHEADBB-NEXT: srl a7, a0, a5 -; RV32XTHEADBB-NEXT: neg t0, a6 -; RV32XTHEADBB-NEXT: sll t0, a1, t0 -; RV32XTHEADBB-NEXT: or a7, a7, t0 +; RV32XTHEADBB-NEXT: neg a7, a6 +; RV32XTHEADBB-NEXT: srl t0, a0, a5 +; RV32XTHEADBB-NEXT: sll a7, a1, a7 +; RV32XTHEADBB-NEXT: or a7, t0, a7 ; RV32XTHEADBB-NEXT: beqz a6, .LBB2_9 ; RV32XTHEADBB-NEXT: .LBB2_8: ; RV32XTHEADBB-NEXT: mv a0, a7 ; RV32XTHEADBB-NEXT: .LBB2_9: -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB2_11 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB2_11 ; RV32XTHEADBB-NEXT: # %bb.10: ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: j .LBB2_12 ; RV32XTHEADBB-NEXT: .LBB2_11: ; RV32XTHEADBB-NEXT: srl a1, a1, a5 ; RV32XTHEADBB-NEXT: .LBB2_12: -; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 ; RV32XTHEADBB-NEXT: ret ; @@ -300,10 +300,10 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: bnez a5, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: neg a6, a5 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a6, a3, a6 +; RV32I-NEXT: neg a3, a5 +; RV32I-NEXT: srl a6, a0, a2 +; RV32I-NEXT: sll a3, a1, a3 +; RV32I-NEXT: or a6, a6, a3 ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: beqz a5, .LBB3_4 ; RV32I-NEXT: .LBB3_3: @@ -316,11 +316,11 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: andi a5, a6, 63 ; RV32I-NEXT: bgeu a5, a4, .LBB3_8 ; RV32I-NEXT: .LBB3_6: +; RV32I-NEXT: neg a4, a5 +; RV32I-NEXT: srl a7, a0, a4 +; RV32I-NEXT: sll t0, a1, a6 ; RV32I-NEXT: sll a4, a0, a6 -; RV32I-NEXT: neg a7, a5 -; RV32I-NEXT: srl a0, a0, a7 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a7, t0 ; RV32I-NEXT: bnez a5, .LBB3_9 ; RV32I-NEXT: j .LBB3_10 ; RV32I-NEXT: .LBB3_7: @@ -357,10 +357,10 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: bnez a5, .LBB3_3 ; RV32ZBB-NEXT: j .LBB3_4 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: srl a3, a0, a2 -; RV32ZBB-NEXT: neg a6, a5 -; RV32ZBB-NEXT: sll a6, a1, a6 -; RV32ZBB-NEXT: or a6, a3, a6 +; RV32ZBB-NEXT: neg a3, a5 +; RV32ZBB-NEXT: srl a6, a0, a2 +; RV32ZBB-NEXT: sll a3, a1, a3 +; RV32ZBB-NEXT: or a6, a6, a3 ; RV32ZBB-NEXT: mv a3, a0 ; RV32ZBB-NEXT: beqz a5, .LBB3_4 ; RV32ZBB-NEXT: .LBB3_3: @@ -373,11 +373,11 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: andi a5, a6, 63 ; RV32ZBB-NEXT: bgeu a5, a4, .LBB3_8 ; RV32ZBB-NEXT: .LBB3_6: +; RV32ZBB-NEXT: neg a4, a5 +; RV32ZBB-NEXT: srl a7, a0, a4 +; RV32ZBB-NEXT: sll t0, a1, a6 ; RV32ZBB-NEXT: sll a4, a0, a6 -; RV32ZBB-NEXT: neg a7, a5 -; RV32ZBB-NEXT: srl a0, a0, a7 -; RV32ZBB-NEXT: sll a6, a1, a6 -; RV32ZBB-NEXT: or a0, a0, a6 +; RV32ZBB-NEXT: or a0, a7, t0 ; RV32ZBB-NEXT: bnez a5, .LBB3_9 ; RV32ZBB-NEXT: j .LBB3_10 ; RV32ZBB-NEXT: .LBB3_7: @@ -411,10 +411,10 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: bnez a5, .LBB3_3 ; RV32XTHEADBB-NEXT: j .LBB3_4 ; RV32XTHEADBB-NEXT: .LBB3_2: -; RV32XTHEADBB-NEXT: srl a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a6, a5 -; RV32XTHEADBB-NEXT: sll a6, a1, a6 -; RV32XTHEADBB-NEXT: or a6, a3, a6 +; RV32XTHEADBB-NEXT: neg a3, a5 +; RV32XTHEADBB-NEXT: srl a6, a0, a2 +; RV32XTHEADBB-NEXT: sll a3, a1, a3 +; RV32XTHEADBB-NEXT: or a6, a6, a3 ; RV32XTHEADBB-NEXT: mv a3, a0 ; RV32XTHEADBB-NEXT: beqz a5, .LBB3_4 ; RV32XTHEADBB-NEXT: .LBB3_3: @@ -427,11 +427,11 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: andi a5, a6, 63 ; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB3_8 ; RV32XTHEADBB-NEXT: .LBB3_6: +; RV32XTHEADBB-NEXT: neg a4, a5 +; RV32XTHEADBB-NEXT: srl a7, a0, a4 +; RV32XTHEADBB-NEXT: sll t0, a1, a6 ; RV32XTHEADBB-NEXT: sll a4, a0, a6 -; RV32XTHEADBB-NEXT: neg a7, a5 -; RV32XTHEADBB-NEXT: srl a0, a0, a7 -; RV32XTHEADBB-NEXT: sll a6, a1, a6 -; RV32XTHEADBB-NEXT: or a0, a0, a6 +; RV32XTHEADBB-NEXT: or a0, a7, t0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB3_9 ; RV32XTHEADBB-NEXT: j .LBB3_10 ; RV32XTHEADBB-NEXT: .LBB3_7: @@ -522,50 +522,50 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind { define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: rotl_32_mask_and_63_and_31: ; RV32I: # %bb.0: -; RV32I-NEXT: sll a2, a0, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: sll a1, a0, a1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_32_mask_and_63_and_31: ; RV64I: # %bb.0: -; RV64I-NEXT: sllw a2, a0, a1 -; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: srlw a0, a0, a1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: sllw a1, a0, a1 +; RV64I-NEXT: srlw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: rotl_32_mask_and_63_and_31: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: sll a2, a0, a1 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: srl a0, a0, a1 -; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: neg a2, a1 +; RV32ZBB-NEXT: sll a1, a0, a1 +; RV32ZBB-NEXT: srl a0, a0, a2 +; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_32_mask_and_63_and_31: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sllw a2, a0, a1 -; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: srlw a0, a0, a1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: neg a2, a1 +; RV64ZBB-NEXT: sllw a1, a0, a1 +; RV64ZBB-NEXT: srlw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: rotl_32_mask_and_63_and_31: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: sll a2, a0, a1 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: srl a0, a0, a1 -; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: neg a2, a1 +; RV32XTHEADBB-NEXT: sll a1, a0, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a2 +; RV32XTHEADBB-NEXT: or a0, a1, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: sllw a2, a0, a1 -; RV64XTHEADBB-NEXT: neg a1, a1 -; RV64XTHEADBB-NEXT: srlw a0, a0, a1 -; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: neg a2, a1 +; RV64XTHEADBB-NEXT: sllw a1, a0, a1 +; RV64XTHEADBB-NEXT: srlw a0, a0, a2 +; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ret %a = and i32 %y, 63 %b = shl i32 %x, %a @@ -680,50 +680,50 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind { define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: rotr_32_mask_and_63_and_31: ; RV32I: # %bb.0: -; RV32I-NEXT: srl a2, a0, a1 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sll a0, a0, a1 -; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: srl a1, a0, a1 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_32_mask_and_63_and_31: ; RV64I: # %bb.0: -; RV64I-NEXT: srlw a2, a0, a1 -; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: sllw a0, a0, a1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: srlw a1, a0, a1 +; RV64I-NEXT: sllw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: rotr_32_mask_and_63_and_31: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srl a2, a0, a1 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sll a0, a0, a1 -; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: neg a2, a1 +; RV32ZBB-NEXT: srl a1, a0, a1 +; RV32ZBB-NEXT: sll a0, a0, a2 +; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_32_mask_and_63_and_31: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srlw a2, a0, a1 -; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: sllw a0, a0, a1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: neg a2, a1 +; RV64ZBB-NEXT: srlw a1, a0, a1 +; RV64ZBB-NEXT: sllw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: rotr_32_mask_and_63_and_31: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: srl a2, a0, a1 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: sll a0, a0, a1 -; RV32XTHEADBB-NEXT: or a0, a2, a0 +; RV32XTHEADBB-NEXT: neg a2, a1 +; RV32XTHEADBB-NEXT: srl a1, a0, a1 +; RV32XTHEADBB-NEXT: sll a0, a0, a2 +; RV32XTHEADBB-NEXT: or a0, a1, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: srlw a2, a0, a1 -; RV64XTHEADBB-NEXT: neg a1, a1 -; RV64XTHEADBB-NEXT: sllw a0, a0, a1 -; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: neg a2, a1 +; RV64XTHEADBB-NEXT: srlw a1, a0, a1 +; RV64XTHEADBB-NEXT: sllw a0, a0, a2 +; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ret %a = and i32 %y, 63 %b = lshr i32 %x, %a @@ -782,18 +782,18 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind { define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: li a5, 32 +; RV32I-NEXT: li a3, 32 ; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: bltu a2, a5, .LBB10_2 +; RV32I-NEXT: bltu a2, a3, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: sll t0, a0, a2 ; RV32I-NEXT: j .LBB10_3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: sll a3, a0, a2 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: srl a6, a0, a6 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: srl a6, a0, a5 ; RV32I-NEXT: sll a7, a1, a2 +; RV32I-NEXT: sll a5, a0, a2 ; RV32I-NEXT: or t0, a6, a7 ; RV32I-NEXT: .LBB10_3: ; RV32I-NEXT: andi a7, a4, 63 @@ -802,28 +802,28 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: bltu a7, a5, .LBB10_7 +; RV32I-NEXT: bltu a7, a3, .LBB10_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: srl a2, a1, a7 ; RV32I-NEXT: bnez a7, .LBB10_8 ; RV32I-NEXT: j .LBB10_9 ; RV32I-NEXT: .LBB10_7: -; RV32I-NEXT: srl a2, a0, a4 -; RV32I-NEXT: neg t0, a7 -; RV32I-NEXT: sll t0, a1, t0 -; RV32I-NEXT: or a2, a2, t0 +; RV32I-NEXT: neg a2, a7 +; RV32I-NEXT: srl t0, a0, a4 +; RV32I-NEXT: sll a2, a1, a2 +; RV32I-NEXT: or a2, t0, a2 ; RV32I-NEXT: beqz a7, .LBB10_9 ; RV32I-NEXT: .LBB10_8: ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB10_9: -; RV32I-NEXT: bltu a7, a5, .LBB10_11 +; RV32I-NEXT: bltu a7, a3, .LBB10_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB10_12 ; RV32I-NEXT: .LBB10_11: ; RV32I-NEXT: srl a1, a1, a4 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a1, a6, a1 ; RV32I-NEXT: ret ; @@ -837,18 +837,18 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotl_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: li a5, 32 +; RV32ZBB-NEXT: li a3, 32 ; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: bltu a2, a5, .LBB10_2 +; RV32ZBB-NEXT: bltu a2, a3, .LBB10_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: li a3, 0 +; RV32ZBB-NEXT: li a5, 0 ; RV32ZBB-NEXT: sll t0, a0, a2 ; RV32ZBB-NEXT: j .LBB10_3 ; RV32ZBB-NEXT: .LBB10_2: -; RV32ZBB-NEXT: sll a3, a0, a2 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: srl a6, a0, a6 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: srl a6, a0, a5 ; RV32ZBB-NEXT: sll a7, a1, a2 +; RV32ZBB-NEXT: sll a5, a0, a2 ; RV32ZBB-NEXT: or t0, a6, a7 ; RV32ZBB-NEXT: .LBB10_3: ; RV32ZBB-NEXT: andi a7, a4, 63 @@ -857,28 +857,28 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: # %bb.4: ; RV32ZBB-NEXT: mv a6, t0 ; RV32ZBB-NEXT: .LBB10_5: -; RV32ZBB-NEXT: bltu a7, a5, .LBB10_7 +; RV32ZBB-NEXT: bltu a7, a3, .LBB10_7 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: srl a2, a1, a7 ; RV32ZBB-NEXT: bnez a7, .LBB10_8 ; RV32ZBB-NEXT: j .LBB10_9 ; RV32ZBB-NEXT: .LBB10_7: -; RV32ZBB-NEXT: srl a2, a0, a4 -; RV32ZBB-NEXT: neg t0, a7 -; RV32ZBB-NEXT: sll t0, a1, t0 -; RV32ZBB-NEXT: or a2, a2, t0 +; RV32ZBB-NEXT: neg a2, a7 +; RV32ZBB-NEXT: srl t0, a0, a4 +; RV32ZBB-NEXT: sll a2, a1, a2 +; RV32ZBB-NEXT: or a2, t0, a2 ; RV32ZBB-NEXT: beqz a7, .LBB10_9 ; RV32ZBB-NEXT: .LBB10_8: ; RV32ZBB-NEXT: mv a0, a2 ; RV32ZBB-NEXT: .LBB10_9: -; RV32ZBB-NEXT: bltu a7, a5, .LBB10_11 +; RV32ZBB-NEXT: bltu a7, a3, .LBB10_11 ; RV32ZBB-NEXT: # %bb.10: ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: j .LBB10_12 ; RV32ZBB-NEXT: .LBB10_11: ; RV32ZBB-NEXT: srl a1, a1, a4 ; RV32ZBB-NEXT: .LBB10_12: -; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a0, a5, a0 ; RV32ZBB-NEXT: or a1, a6, a1 ; RV32ZBB-NEXT: ret ; @@ -892,18 +892,18 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64_mask: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: li a5, 32 +; RV32XTHEADBB-NEXT: li a3, 32 ; RV32XTHEADBB-NEXT: neg a4, a2 -; RV32XTHEADBB-NEXT: bltu a2, a5, .LBB10_2 +; RV32XTHEADBB-NEXT: bltu a2, a3, .LBB10_2 ; RV32XTHEADBB-NEXT: # %bb.1: -; RV32XTHEADBB-NEXT: li a3, 0 +; RV32XTHEADBB-NEXT: li a5, 0 ; RV32XTHEADBB-NEXT: sll t0, a0, a2 ; RV32XTHEADBB-NEXT: j .LBB10_3 ; RV32XTHEADBB-NEXT: .LBB10_2: -; RV32XTHEADBB-NEXT: sll a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: srl a6, a0, a6 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: srl a6, a0, a5 ; RV32XTHEADBB-NEXT: sll a7, a1, a2 +; RV32XTHEADBB-NEXT: sll a5, a0, a2 ; RV32XTHEADBB-NEXT: or t0, a6, a7 ; RV32XTHEADBB-NEXT: .LBB10_3: ; RV32XTHEADBB-NEXT: andi a7, a4, 63 @@ -912,28 +912,28 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: # %bb.4: ; RV32XTHEADBB-NEXT: mv a6, t0 ; RV32XTHEADBB-NEXT: .LBB10_5: -; RV32XTHEADBB-NEXT: bltu a7, a5, .LBB10_7 +; RV32XTHEADBB-NEXT: bltu a7, a3, .LBB10_7 ; RV32XTHEADBB-NEXT: # %bb.6: ; RV32XTHEADBB-NEXT: srl a2, a1, a7 ; RV32XTHEADBB-NEXT: bnez a7, .LBB10_8 ; RV32XTHEADBB-NEXT: j .LBB10_9 ; RV32XTHEADBB-NEXT: .LBB10_7: -; RV32XTHEADBB-NEXT: srl a2, a0, a4 -; RV32XTHEADBB-NEXT: neg t0, a7 -; RV32XTHEADBB-NEXT: sll t0, a1, t0 -; RV32XTHEADBB-NEXT: or a2, a2, t0 +; RV32XTHEADBB-NEXT: neg a2, a7 +; RV32XTHEADBB-NEXT: srl t0, a0, a4 +; RV32XTHEADBB-NEXT: sll a2, a1, a2 +; RV32XTHEADBB-NEXT: or a2, t0, a2 ; RV32XTHEADBB-NEXT: beqz a7, .LBB10_9 ; RV32XTHEADBB-NEXT: .LBB10_8: ; RV32XTHEADBB-NEXT: mv a0, a2 ; RV32XTHEADBB-NEXT: .LBB10_9: -; RV32XTHEADBB-NEXT: bltu a7, a5, .LBB10_11 +; RV32XTHEADBB-NEXT: bltu a7, a3, .LBB10_11 ; RV32XTHEADBB-NEXT: # %bb.10: ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: j .LBB10_12 ; RV32XTHEADBB-NEXT: .LBB10_11: ; RV32XTHEADBB-NEXT: srl a1, a1, a4 ; RV32XTHEADBB-NEXT: .LBB10_12: -; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a0, a5, a0 ; RV32XTHEADBB-NEXT: or a1, a6, a1 ; RV32XTHEADBB-NEXT: ret ; @@ -956,19 +956,19 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask_and_127_and_63: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a6, a2, 127 -; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: bltu a6, a4, .LBB11_2 +; RV32I-NEXT: li a3, 32 +; RV32I-NEXT: bltu a6, a3, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: sll a7, a0, a6 ; RV32I-NEXT: mv a5, a1 ; RV32I-NEXT: bnez a6, .LBB11_3 ; RV32I-NEXT: j .LBB11_4 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sll a3, a0, a2 -; RV32I-NEXT: neg a5, a6 -; RV32I-NEXT: srl a5, a0, a5 +; RV32I-NEXT: neg a4, a6 +; RV32I-NEXT: srl a5, a0, a4 ; RV32I-NEXT: sll a7, a1, a2 +; RV32I-NEXT: sll a4, a0, a2 ; RV32I-NEXT: or a7, a5, a7 ; RV32I-NEXT: mv a5, a1 ; RV32I-NEXT: beqz a6, .LBB11_4 @@ -977,55 +977,55 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: .LBB11_4: ; RV32I-NEXT: neg a2, a2 ; RV32I-NEXT: andi a6, a2, 63 -; RV32I-NEXT: bltu a6, a4, .LBB11_6 +; RV32I-NEXT: bltu a6, a3, .LBB11_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: srl a7, a1, a6 ; RV32I-NEXT: bnez a6, .LBB11_7 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: srl a7, a0, a2 -; RV32I-NEXT: neg t0, a6 -; RV32I-NEXT: sll t0, a1, t0 -; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: neg a7, a6 +; RV32I-NEXT: srl t0, a0, a2 +; RV32I-NEXT: sll a7, a1, a7 +; RV32I-NEXT: or a7, t0, a7 ; RV32I-NEXT: beqz a6, .LBB11_8 ; RV32I-NEXT: .LBB11_7: ; RV32I-NEXT: mv a0, a7 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: bltu a6, a4, .LBB11_10 +; RV32I-NEXT: bltu a6, a3, .LBB11_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB11_11 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: .LBB11_11: -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_and_127_and_63: ; RV64I: # %bb.0: -; RV64I-NEXT: sll a2, a0, a1 -; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: srl a0, a0, a1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: sll a1, a0, a1 +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: rotl_64_mask_and_127_and_63: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: andi a6, a2, 127 -; RV32ZBB-NEXT: li a4, 32 -; RV32ZBB-NEXT: bltu a6, a4, .LBB11_2 +; RV32ZBB-NEXT: li a3, 32 +; RV32ZBB-NEXT: bltu a6, a3, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: li a3, 0 +; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: sll a7, a0, a6 ; RV32ZBB-NEXT: mv a5, a1 ; RV32ZBB-NEXT: bnez a6, .LBB11_3 ; RV32ZBB-NEXT: j .LBB11_4 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sll a3, a0, a2 -; RV32ZBB-NEXT: neg a5, a6 -; RV32ZBB-NEXT: srl a5, a0, a5 +; RV32ZBB-NEXT: neg a4, a6 +; RV32ZBB-NEXT: srl a5, a0, a4 ; RV32ZBB-NEXT: sll a7, a1, a2 +; RV32ZBB-NEXT: sll a4, a0, a2 ; RV32ZBB-NEXT: or a7, a5, a7 ; RV32ZBB-NEXT: mv a5, a1 ; RV32ZBB-NEXT: beqz a6, .LBB11_4 @@ -1034,55 +1034,55 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: .LBB11_4: ; RV32ZBB-NEXT: neg a2, a2 ; RV32ZBB-NEXT: andi a6, a2, 63 -; RV32ZBB-NEXT: bltu a6, a4, .LBB11_6 +; RV32ZBB-NEXT: bltu a6, a3, .LBB11_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: srl a7, a1, a6 ; RV32ZBB-NEXT: bnez a6, .LBB11_7 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: srl a7, a0, a2 -; RV32ZBB-NEXT: neg t0, a6 -; RV32ZBB-NEXT: sll t0, a1, t0 -; RV32ZBB-NEXT: or a7, a7, t0 +; RV32ZBB-NEXT: neg a7, a6 +; RV32ZBB-NEXT: srl t0, a0, a2 +; RV32ZBB-NEXT: sll a7, a1, a7 +; RV32ZBB-NEXT: or a7, t0, a7 ; RV32ZBB-NEXT: beqz a6, .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: ; RV32ZBB-NEXT: mv a0, a7 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: bltu a6, a4, .LBB11_10 +; RV32ZBB-NEXT: bltu a6, a3, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: j .LBB11_11 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: srl a1, a1, a2 ; RV32ZBB-NEXT: .LBB11_11: -; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a0, a4, a0 ; RV32ZBB-NEXT: or a1, a5, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sll a2, a0, a1 -; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: srl a0, a0, a1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: neg a2, a1 +; RV64ZBB-NEXT: sll a1, a0, a1 +; RV64ZBB-NEXT: srl a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a6, a2, 127 -; RV32XTHEADBB-NEXT: li a4, 32 -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_2 +; RV32XTHEADBB-NEXT: li a3, 32 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB11_2 ; RV32XTHEADBB-NEXT: # %bb.1: -; RV32XTHEADBB-NEXT: li a3, 0 +; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: sll a7, a0, a6 ; RV32XTHEADBB-NEXT: mv a5, a1 ; RV32XTHEADBB-NEXT: bnez a6, .LBB11_3 ; RV32XTHEADBB-NEXT: j .LBB11_4 ; RV32XTHEADBB-NEXT: .LBB11_2: -; RV32XTHEADBB-NEXT: sll a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a5, a6 -; RV32XTHEADBB-NEXT: srl a5, a0, a5 +; RV32XTHEADBB-NEXT: neg a4, a6 +; RV32XTHEADBB-NEXT: srl a5, a0, a4 ; RV32XTHEADBB-NEXT: sll a7, a1, a2 +; RV32XTHEADBB-NEXT: sll a4, a0, a2 ; RV32XTHEADBB-NEXT: or a7, a5, a7 ; RV32XTHEADBB-NEXT: mv a5, a1 ; RV32XTHEADBB-NEXT: beqz a6, .LBB11_4 @@ -1091,37 +1091,37 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: .LBB11_4: ; RV32XTHEADBB-NEXT: neg a2, a2 ; RV32XTHEADBB-NEXT: andi a6, a2, 63 -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_6 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB11_6 ; RV32XTHEADBB-NEXT: # %bb.5: ; RV32XTHEADBB-NEXT: srl a7, a1, a6 ; RV32XTHEADBB-NEXT: bnez a6, .LBB11_7 ; RV32XTHEADBB-NEXT: j .LBB11_8 ; RV32XTHEADBB-NEXT: .LBB11_6: -; RV32XTHEADBB-NEXT: srl a7, a0, a2 -; RV32XTHEADBB-NEXT: neg t0, a6 -; RV32XTHEADBB-NEXT: sll t0, a1, t0 -; RV32XTHEADBB-NEXT: or a7, a7, t0 +; RV32XTHEADBB-NEXT: neg a7, a6 +; RV32XTHEADBB-NEXT: srl t0, a0, a2 +; RV32XTHEADBB-NEXT: sll a7, a1, a7 +; RV32XTHEADBB-NEXT: or a7, t0, a7 ; RV32XTHEADBB-NEXT: beqz a6, .LBB11_8 ; RV32XTHEADBB-NEXT: .LBB11_7: ; RV32XTHEADBB-NEXT: mv a0, a7 ; RV32XTHEADBB-NEXT: .LBB11_8: -; RV32XTHEADBB-NEXT: bltu a6, a4, .LBB11_10 +; RV32XTHEADBB-NEXT: bltu a6, a3, .LBB11_10 ; RV32XTHEADBB-NEXT: # %bb.9: ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: j .LBB11_11 ; RV32XTHEADBB-NEXT: .LBB11_10: ; RV32XTHEADBB-NEXT: srl a1, a1, a2 ; RV32XTHEADBB-NEXT: .LBB11_11: -; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: or a1, a5, a1 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: sll a2, a0, a1 -; RV64XTHEADBB-NEXT: neg a1, a1 -; RV64XTHEADBB-NEXT: srl a0, a0, a1 -; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: neg a2, a1 +; RV64XTHEADBB-NEXT: sll a1, a0, a1 +; RV64XTHEADBB-NEXT: srl a0, a0, a2 +; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ret %a = and i64 %y, 127 %b = shl i64 %x, %a @@ -1145,11 +1145,11 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: bnez a2, .LBB12_3 ; RV32I-NEXT: j .LBB12_4 ; RV32I-NEXT: .LBB12_2: +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: srl a4, a3, a0 +; RV32I-NEXT: sll a5, a1, a2 ; RV32I-NEXT: sll a0, a3, a2 -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: sll a4, a1, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: or a3, a4, a5 ; RV32I-NEXT: beqz a2, .LBB12_4 ; RV32I-NEXT: .LBB12_3: ; RV32I-NEXT: mv a1, a3 @@ -1174,11 +1174,11 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: bnez a2, .LBB12_3 ; RV32ZBB-NEXT: j .LBB12_4 ; RV32ZBB-NEXT: .LBB12_2: +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: srl a4, a3, a0 +; RV32ZBB-NEXT: sll a5, a1, a2 ; RV32ZBB-NEXT: sll a0, a3, a2 -; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: srl a3, a3, a4 -; RV32ZBB-NEXT: sll a4, a1, a2 -; RV32ZBB-NEXT: or a3, a3, a4 +; RV32ZBB-NEXT: or a3, a4, a5 ; RV32ZBB-NEXT: beqz a2, .LBB12_4 ; RV32ZBB-NEXT: .LBB12_3: ; RV32ZBB-NEXT: mv a1, a3 @@ -1203,11 +1203,11 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: bnez a2, .LBB12_3 ; RV32XTHEADBB-NEXT: j .LBB12_4 ; RV32XTHEADBB-NEXT: .LBB12_2: +; RV32XTHEADBB-NEXT: neg a0, a2 +; RV32XTHEADBB-NEXT: srl a4, a3, a0 +; RV32XTHEADBB-NEXT: sll a5, a1, a2 ; RV32XTHEADBB-NEXT: sll a0, a3, a2 -; RV32XTHEADBB-NEXT: neg a4, a2 -; RV32XTHEADBB-NEXT: srl a3, a3, a4 -; RV32XTHEADBB-NEXT: sll a4, a1, a2 -; RV32XTHEADBB-NEXT: or a3, a3, a4 +; RV32XTHEADBB-NEXT: or a3, a4, a5 ; RV32XTHEADBB-NEXT: beqz a2, .LBB12_4 ; RV32XTHEADBB-NEXT: .LBB12_3: ; RV32XTHEADBB-NEXT: mv a1, a3 @@ -1237,10 +1237,10 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: srl a5, a1, a2 ; RV32I-NEXT: j .LBB13_3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a5, a3, a5 +; RV32I-NEXT: neg a3, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a3, a1, a3 +; RV32I-NEXT: or a5, a5, a3 ; RV32I-NEXT: .LBB13_3: ; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: mv a3, a0 @@ -1254,11 +1254,11 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: bgeu a5, a4, .LBB13_9 ; RV32I-NEXT: .LBB13_7: +; RV32I-NEXT: neg a4, a5 +; RV32I-NEXT: srl a7, a0, a4 +; RV32I-NEXT: sll t0, a1, a6 ; RV32I-NEXT: sll a4, a0, a6 -; RV32I-NEXT: neg a7, a5 -; RV32I-NEXT: srl a0, a0, a7 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a7, t0 ; RV32I-NEXT: bnez a5, .LBB13_10 ; RV32I-NEXT: j .LBB13_11 ; RV32I-NEXT: .LBB13_8: @@ -1291,10 +1291,10 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: srl a5, a1, a2 ; RV32ZBB-NEXT: j .LBB13_3 ; RV32ZBB-NEXT: .LBB13_2: -; RV32ZBB-NEXT: srl a3, a0, a2 -; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: sll a5, a1, a5 -; RV32ZBB-NEXT: or a5, a3, a5 +; RV32ZBB-NEXT: neg a3, a2 +; RV32ZBB-NEXT: srl a5, a0, a2 +; RV32ZBB-NEXT: sll a3, a1, a3 +; RV32ZBB-NEXT: or a5, a5, a3 ; RV32ZBB-NEXT: .LBB13_3: ; RV32ZBB-NEXT: neg a6, a2 ; RV32ZBB-NEXT: mv a3, a0 @@ -1308,11 +1308,11 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: li a2, 0 ; RV32ZBB-NEXT: bgeu a5, a4, .LBB13_9 ; RV32ZBB-NEXT: .LBB13_7: +; RV32ZBB-NEXT: neg a4, a5 +; RV32ZBB-NEXT: srl a7, a0, a4 +; RV32ZBB-NEXT: sll t0, a1, a6 ; RV32ZBB-NEXT: sll a4, a0, a6 -; RV32ZBB-NEXT: neg a7, a5 -; RV32ZBB-NEXT: srl a0, a0, a7 -; RV32ZBB-NEXT: sll a6, a1, a6 -; RV32ZBB-NEXT: or a0, a0, a6 +; RV32ZBB-NEXT: or a0, a7, t0 ; RV32ZBB-NEXT: bnez a5, .LBB13_10 ; RV32ZBB-NEXT: j .LBB13_11 ; RV32ZBB-NEXT: .LBB13_8: @@ -1345,10 +1345,10 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: srl a5, a1, a2 ; RV32XTHEADBB-NEXT: j .LBB13_3 ; RV32XTHEADBB-NEXT: .LBB13_2: -; RV32XTHEADBB-NEXT: srl a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a5, a2 -; RV32XTHEADBB-NEXT: sll a5, a1, a5 -; RV32XTHEADBB-NEXT: or a5, a3, a5 +; RV32XTHEADBB-NEXT: neg a3, a2 +; RV32XTHEADBB-NEXT: srl a5, a0, a2 +; RV32XTHEADBB-NEXT: sll a3, a1, a3 +; RV32XTHEADBB-NEXT: or a5, a5, a3 ; RV32XTHEADBB-NEXT: .LBB13_3: ; RV32XTHEADBB-NEXT: neg a6, a2 ; RV32XTHEADBB-NEXT: mv a3, a0 @@ -1362,11 +1362,11 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: li a2, 0 ; RV32XTHEADBB-NEXT: bgeu a5, a4, .LBB13_9 ; RV32XTHEADBB-NEXT: .LBB13_7: +; RV32XTHEADBB-NEXT: neg a4, a5 +; RV32XTHEADBB-NEXT: srl a7, a0, a4 +; RV32XTHEADBB-NEXT: sll t0, a1, a6 ; RV32XTHEADBB-NEXT: sll a4, a0, a6 -; RV32XTHEADBB-NEXT: neg a7, a5 -; RV32XTHEADBB-NEXT: srl a0, a0, a7 -; RV32XTHEADBB-NEXT: sll a6, a1, a6 -; RV32XTHEADBB-NEXT: or a0, a0, a6 +; RV32XTHEADBB-NEXT: or a0, a7, t0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB13_10 ; RV32XTHEADBB-NEXT: j .LBB13_11 ; RV32XTHEADBB-NEXT: .LBB13_8: @@ -1410,10 +1410,10 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: bnez a4, .LBB14_3 ; RV32I-NEXT: j .LBB14_4 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: neg a6, a4 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a6, a3, a6 +; RV32I-NEXT: neg a3, a4 +; RV32I-NEXT: srl a6, a0, a2 +; RV32I-NEXT: sll a3, a1, a3 +; RV32I-NEXT: or a6, a6, a3 ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: beqz a4, .LBB14_4 ; RV32I-NEXT: .LBB14_3: @@ -1426,34 +1426,34 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: .LBB14_6: ; RV32I-NEXT: srl a4, a1, a2 ; RV32I-NEXT: .LBB14_7: -; RV32I-NEXT: neg a7, a2 -; RV32I-NEXT: andi a6, a7, 63 -; RV32I-NEXT: bltu a6, a5, .LBB14_9 +; RV32I-NEXT: neg a6, a2 +; RV32I-NEXT: andi a2, a6, 63 +; RV32I-NEXT: bltu a2, a5, .LBB14_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: sll a0, a0, a6 -; RV32I-NEXT: bnez a6, .LBB14_10 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: bnez a2, .LBB14_10 ; RV32I-NEXT: j .LBB14_11 ; RV32I-NEXT: .LBB14_9: -; RV32I-NEXT: sll a2, a0, a7 -; RV32I-NEXT: neg a5, a6 -; RV32I-NEXT: srl a0, a0, a5 -; RV32I-NEXT: sll a5, a1, a7 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: beqz a6, .LBB14_11 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: srl a7, a0, a5 +; RV32I-NEXT: sll t0, a1, a6 +; RV32I-NEXT: sll a5, a0, a6 +; RV32I-NEXT: or a0, a7, t0 +; RV32I-NEXT: beqz a2, .LBB14_11 ; RV32I-NEXT: .LBB14_10: ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB14_11: -; RV32I-NEXT: or a0, a3, a2 +; RV32I-NEXT: or a0, a3, a5 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64_mask_and_127_and_63: ; RV64I: # %bb.0: -; RV64I-NEXT: srl a2, a0, a1 -; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: sll a0, a0, a1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: srl a1, a0, a1 +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: rotr_64_mask_and_127_and_63: @@ -1467,10 +1467,10 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: bnez a4, .LBB14_3 ; RV32ZBB-NEXT: j .LBB14_4 ; RV32ZBB-NEXT: .LBB14_2: -; RV32ZBB-NEXT: srl a3, a0, a2 -; RV32ZBB-NEXT: neg a6, a4 -; RV32ZBB-NEXT: sll a6, a1, a6 -; RV32ZBB-NEXT: or a6, a3, a6 +; RV32ZBB-NEXT: neg a3, a4 +; RV32ZBB-NEXT: srl a6, a0, a2 +; RV32ZBB-NEXT: sll a3, a1, a3 +; RV32ZBB-NEXT: or a6, a6, a3 ; RV32ZBB-NEXT: mv a3, a0 ; RV32ZBB-NEXT: beqz a4, .LBB14_4 ; RV32ZBB-NEXT: .LBB14_3: @@ -1483,34 +1483,34 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: .LBB14_6: ; RV32ZBB-NEXT: srl a4, a1, a2 ; RV32ZBB-NEXT: .LBB14_7: -; RV32ZBB-NEXT: neg a7, a2 -; RV32ZBB-NEXT: andi a6, a7, 63 -; RV32ZBB-NEXT: bltu a6, a5, .LBB14_9 +; RV32ZBB-NEXT: neg a6, a2 +; RV32ZBB-NEXT: andi a2, a6, 63 +; RV32ZBB-NEXT: bltu a2, a5, .LBB14_9 ; RV32ZBB-NEXT: # %bb.8: -; RV32ZBB-NEXT: li a2, 0 -; RV32ZBB-NEXT: sll a0, a0, a6 -; RV32ZBB-NEXT: bnez a6, .LBB14_10 +; RV32ZBB-NEXT: li a5, 0 +; RV32ZBB-NEXT: sll a0, a0, a2 +; RV32ZBB-NEXT: bnez a2, .LBB14_10 ; RV32ZBB-NEXT: j .LBB14_11 ; RV32ZBB-NEXT: .LBB14_9: -; RV32ZBB-NEXT: sll a2, a0, a7 -; RV32ZBB-NEXT: neg a5, a6 -; RV32ZBB-NEXT: srl a0, a0, a5 -; RV32ZBB-NEXT: sll a5, a1, a7 -; RV32ZBB-NEXT: or a0, a0, a5 -; RV32ZBB-NEXT: beqz a6, .LBB14_11 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: srl a7, a0, a5 +; RV32ZBB-NEXT: sll t0, a1, a6 +; RV32ZBB-NEXT: sll a5, a0, a6 +; RV32ZBB-NEXT: or a0, a7, t0 +; RV32ZBB-NEXT: beqz a2, .LBB14_11 ; RV32ZBB-NEXT: .LBB14_10: ; RV32ZBB-NEXT: mv a1, a0 ; RV32ZBB-NEXT: .LBB14_11: -; RV32ZBB-NEXT: or a0, a3, a2 +; RV32ZBB-NEXT: or a0, a3, a5 ; RV32ZBB-NEXT: or a1, a4, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64_mask_and_127_and_63: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srl a2, a0, a1 -; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: sll a0, a0, a1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: neg a2, a1 +; RV64ZBB-NEXT: srl a1, a0, a1 +; RV64ZBB-NEXT: sll a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: @@ -1524,10 +1524,10 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: bnez a4, .LBB14_3 ; RV32XTHEADBB-NEXT: j .LBB14_4 ; RV32XTHEADBB-NEXT: .LBB14_2: -; RV32XTHEADBB-NEXT: srl a3, a0, a2 -; RV32XTHEADBB-NEXT: neg a6, a4 -; RV32XTHEADBB-NEXT: sll a6, a1, a6 -; RV32XTHEADBB-NEXT: or a6, a3, a6 +; RV32XTHEADBB-NEXT: neg a3, a4 +; RV32XTHEADBB-NEXT: srl a6, a0, a2 +; RV32XTHEADBB-NEXT: sll a3, a1, a3 +; RV32XTHEADBB-NEXT: or a6, a6, a3 ; RV32XTHEADBB-NEXT: mv a3, a0 ; RV32XTHEADBB-NEXT: beqz a4, .LBB14_4 ; RV32XTHEADBB-NEXT: .LBB14_3: @@ -1540,34 +1540,34 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: .LBB14_6: ; RV32XTHEADBB-NEXT: srl a4, a1, a2 ; RV32XTHEADBB-NEXT: .LBB14_7: -; RV32XTHEADBB-NEXT: neg a7, a2 -; RV32XTHEADBB-NEXT: andi a6, a7, 63 -; RV32XTHEADBB-NEXT: bltu a6, a5, .LBB14_9 +; RV32XTHEADBB-NEXT: neg a6, a2 +; RV32XTHEADBB-NEXT: andi a2, a6, 63 +; RV32XTHEADBB-NEXT: bltu a2, a5, .LBB14_9 ; RV32XTHEADBB-NEXT: # %bb.8: -; RV32XTHEADBB-NEXT: li a2, 0 -; RV32XTHEADBB-NEXT: sll a0, a0, a6 -; RV32XTHEADBB-NEXT: bnez a6, .LBB14_10 +; RV32XTHEADBB-NEXT: li a5, 0 +; RV32XTHEADBB-NEXT: sll a0, a0, a2 +; RV32XTHEADBB-NEXT: bnez a2, .LBB14_10 ; RV32XTHEADBB-NEXT: j .LBB14_11 ; RV32XTHEADBB-NEXT: .LBB14_9: -; RV32XTHEADBB-NEXT: sll a2, a0, a7 -; RV32XTHEADBB-NEXT: neg a5, a6 -; RV32XTHEADBB-NEXT: srl a0, a0, a5 -; RV32XTHEADBB-NEXT: sll a5, a1, a7 -; RV32XTHEADBB-NEXT: or a0, a0, a5 -; RV32XTHEADBB-NEXT: beqz a6, .LBB14_11 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: srl a7, a0, a5 +; RV32XTHEADBB-NEXT: sll t0, a1, a6 +; RV32XTHEADBB-NEXT: sll a5, a0, a6 +; RV32XTHEADBB-NEXT: or a0, a7, t0 +; RV32XTHEADBB-NEXT: beqz a2, .LBB14_11 ; RV32XTHEADBB-NEXT: .LBB14_10: ; RV32XTHEADBB-NEXT: mv a1, a0 ; RV32XTHEADBB-NEXT: .LBB14_11: -; RV32XTHEADBB-NEXT: or a0, a3, a2 +; RV32XTHEADBB-NEXT: or a0, a3, a5 ; RV32XTHEADBB-NEXT: or a1, a4, a1 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: srl a2, a0, a1 -; RV64XTHEADBB-NEXT: neg a1, a1 -; RV64XTHEADBB-NEXT: sll a0, a0, a1 -; RV64XTHEADBB-NEXT: or a0, a2, a0 +; RV64XTHEADBB-NEXT: neg a2, a1 +; RV64XTHEADBB-NEXT: srl a1, a0, a1 +; RV64XTHEADBB-NEXT: sll a0, a0, a2 +; RV64XTHEADBB-NEXT: or a0, a1, a0 ; RV64XTHEADBB-NEXT: ret %a = and i64 %y, 127 %b = lshr i64 %x, %a @@ -1589,10 +1589,10 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: bnez a2, .LBB15_3 ; RV32I-NEXT: j .LBB15_4 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a4, a1, a4 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a2, .LBB15_4 ; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: mv a0, a4 @@ -1621,10 +1621,10 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: bnez a2, .LBB15_3 ; RV32ZBB-NEXT: j .LBB15_4 ; RV32ZBB-NEXT: .LBB15_2: -; RV32ZBB-NEXT: srl a4, a0, a2 -; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: sll a5, a1, a5 -; RV32ZBB-NEXT: or a4, a4, a5 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: srl a5, a0, a2 +; RV32ZBB-NEXT: sll a4, a1, a4 +; RV32ZBB-NEXT: or a4, a5, a4 ; RV32ZBB-NEXT: beqz a2, .LBB15_4 ; RV32ZBB-NEXT: .LBB15_3: ; RV32ZBB-NEXT: mv a0, a4 @@ -1653,10 +1653,10 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: bnez a2, .LBB15_3 ; RV32XTHEADBB-NEXT: j .LBB15_4 ; RV32XTHEADBB-NEXT: .LBB15_2: -; RV32XTHEADBB-NEXT: srl a4, a0, a2 -; RV32XTHEADBB-NEXT: neg a5, a2 -; RV32XTHEADBB-NEXT: sll a5, a1, a5 -; RV32XTHEADBB-NEXT: or a4, a4, a5 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: srl a5, a0, a2 +; RV32XTHEADBB-NEXT: sll a4, a1, a4 +; RV32XTHEADBB-NEXT: or a4, a5, a4 ; RV32XTHEADBB-NEXT: beqz a2, .LBB15_4 ; RV32XTHEADBB-NEXT: .LBB15_3: ; RV32XTHEADBB-NEXT: mv a0, a4 @@ -1689,8 +1689,8 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV32I-LABEL: rotl_32_mask_shared: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a3, a2, 31 -; RV32I-NEXT: sll a4, a0, a2 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: sll a4, a0, a2 ; RV32I-NEXT: srl a0, a0, a3 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: sll a1, a1, a2 @@ -1700,8 +1700,8 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV64I-LABEL: rotl_32_mask_shared: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 31 -; RV64I-NEXT: sllw a4, a0, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: sllw a4, a0, a2 ; RV64I-NEXT: srlw a0, a0, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: sllw a1, a1, a2 @@ -1725,8 +1725,8 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV32XTHEADBB-LABEL: rotl_32_mask_shared: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a3, a2, 31 -; RV32XTHEADBB-NEXT: sll a4, a0, a2 ; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: sll a4, a0, a2 ; RV32XTHEADBB-NEXT: srl a0, a0, a3 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: sll a1, a1, a2 @@ -1736,8 +1736,8 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV64XTHEADBB-LABEL: rotl_32_mask_shared: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 31 -; RV64XTHEADBB-NEXT: sllw a4, a0, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: sllw a4, a0, a2 ; RV64XTHEADBB-NEXT: srlw a0, a0, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: sllw a1, a1, a2 @@ -1763,9 +1763,9 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: sll t3, a0, a5 ; RV32I-NEXT: j .LBB17_3 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sll a6, a0, a4 ; RV32I-NEXT: srl t1, a0, a7 ; RV32I-NEXT: sll t2, a1, a4 +; RV32I-NEXT: sll a6, a0, a4 ; RV32I-NEXT: or t3, t1, t2 ; RV32I-NEXT: .LBB17_3: ; RV32I-NEXT: neg t2, a5 @@ -1781,10 +1781,10 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: bnez t3, .LBB17_8 ; RV32I-NEXT: j .LBB17_9 ; RV32I-NEXT: .LBB17_7: -; RV32I-NEXT: srl t4, a0, t2 -; RV32I-NEXT: neg t5, t3 -; RV32I-NEXT: sll t5, a1, t5 -; RV32I-NEXT: or t4, t4, t5 +; RV32I-NEXT: neg t4, t3 +; RV32I-NEXT: srl t5, a0, t2 +; RV32I-NEXT: sll t4, a1, t4 +; RV32I-NEXT: or t4, t5, t4 ; RV32I-NEXT: beqz t3, .LBB17_9 ; RV32I-NEXT: .LBB17_8: ; RV32I-NEXT: mv a0, t4 @@ -1794,16 +1794,16 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: bgeu a5, t0, .LBB17_13 ; RV32I-NEXT: .LBB17_11: -; RV32I-NEXT: sll t0, a2, a4 -; RV32I-NEXT: srl a2, a2, a7 -; RV32I-NEXT: sll a4, a3, a4 -; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: srl a7, a2, a7 +; RV32I-NEXT: sll t0, a3, a4 +; RV32I-NEXT: sll a4, a2, a4 +; RV32I-NEXT: or a2, a7, t0 ; RV32I-NEXT: j .LBB17_14 ; RV32I-NEXT: .LBB17_12: ; RV32I-NEXT: srl a1, a1, t2 ; RV32I-NEXT: bltu a5, t0, .LBB17_11 ; RV32I-NEXT: .LBB17_13: -; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: sll a2, a2, a5 ; RV32I-NEXT: .LBB17_14: ; RV32I-NEXT: or a0, a6, a0 @@ -1812,8 +1812,8 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: # %bb.15: ; RV32I-NEXT: mv a3, a2 ; RV32I-NEXT: .LBB17_16: -; RV32I-NEXT: add a0, a0, t0 -; RV32I-NEXT: sltu a2, a0, t0 +; RV32I-NEXT: add a0, a0, a4 +; RV32I-NEXT: sltu a2, a0, a4 ; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret @@ -1821,8 +1821,8 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV64I-LABEL: rotl_64_mask_shared: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 63 -; RV64I-NEXT: sll a4, a0, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: sll a4, a0, a2 ; RV64I-NEXT: srl a0, a0, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: sll a1, a1, a2 @@ -1840,9 +1840,9 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: sll t3, a0, a5 ; RV32ZBB-NEXT: j .LBB17_3 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sll a6, a0, a4 ; RV32ZBB-NEXT: srl t1, a0, a7 ; RV32ZBB-NEXT: sll t2, a1, a4 +; RV32ZBB-NEXT: sll a6, a0, a4 ; RV32ZBB-NEXT: or t3, t1, t2 ; RV32ZBB-NEXT: .LBB17_3: ; RV32ZBB-NEXT: neg t2, a5 @@ -1858,10 +1858,10 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: bnez t3, .LBB17_8 ; RV32ZBB-NEXT: j .LBB17_9 ; RV32ZBB-NEXT: .LBB17_7: -; RV32ZBB-NEXT: srl t4, a0, t2 -; RV32ZBB-NEXT: neg t5, t3 -; RV32ZBB-NEXT: sll t5, a1, t5 -; RV32ZBB-NEXT: or t4, t4, t5 +; RV32ZBB-NEXT: neg t4, t3 +; RV32ZBB-NEXT: srl t5, a0, t2 +; RV32ZBB-NEXT: sll t4, a1, t4 +; RV32ZBB-NEXT: or t4, t5, t4 ; RV32ZBB-NEXT: beqz t3, .LBB17_9 ; RV32ZBB-NEXT: .LBB17_8: ; RV32ZBB-NEXT: mv a0, t4 @@ -1871,16 +1871,16 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: bgeu a5, t0, .LBB17_13 ; RV32ZBB-NEXT: .LBB17_11: -; RV32ZBB-NEXT: sll t0, a2, a4 -; RV32ZBB-NEXT: srl a2, a2, a7 -; RV32ZBB-NEXT: sll a4, a3, a4 -; RV32ZBB-NEXT: or a2, a2, a4 +; RV32ZBB-NEXT: srl a7, a2, a7 +; RV32ZBB-NEXT: sll t0, a3, a4 +; RV32ZBB-NEXT: sll a4, a2, a4 +; RV32ZBB-NEXT: or a2, a7, t0 ; RV32ZBB-NEXT: j .LBB17_14 ; RV32ZBB-NEXT: .LBB17_12: ; RV32ZBB-NEXT: srl a1, a1, t2 ; RV32ZBB-NEXT: bltu a5, t0, .LBB17_11 ; RV32ZBB-NEXT: .LBB17_13: -; RV32ZBB-NEXT: li t0, 0 +; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: sll a2, a2, a5 ; RV32ZBB-NEXT: .LBB17_14: ; RV32ZBB-NEXT: or a0, a6, a0 @@ -1889,8 +1889,8 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: # %bb.15: ; RV32ZBB-NEXT: mv a3, a2 ; RV32ZBB-NEXT: .LBB17_16: -; RV32ZBB-NEXT: add a0, a0, t0 -; RV32ZBB-NEXT: sltu a2, a0, t0 +; RV32ZBB-NEXT: add a0, a0, a4 +; RV32ZBB-NEXT: sltu a2, a0, a4 ; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: ret @@ -1913,9 +1913,9 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: sll t3, a0, a5 ; RV32XTHEADBB-NEXT: j .LBB17_3 ; RV32XTHEADBB-NEXT: .LBB17_2: -; RV32XTHEADBB-NEXT: sll a6, a0, a4 ; RV32XTHEADBB-NEXT: srl t1, a0, a7 ; RV32XTHEADBB-NEXT: sll t2, a1, a4 +; RV32XTHEADBB-NEXT: sll a6, a0, a4 ; RV32XTHEADBB-NEXT: or t3, t1, t2 ; RV32XTHEADBB-NEXT: .LBB17_3: ; RV32XTHEADBB-NEXT: neg t2, a5 @@ -1931,10 +1931,10 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: bnez t3, .LBB17_8 ; RV32XTHEADBB-NEXT: j .LBB17_9 ; RV32XTHEADBB-NEXT: .LBB17_7: -; RV32XTHEADBB-NEXT: srl t4, a0, t2 -; RV32XTHEADBB-NEXT: neg t5, t3 -; RV32XTHEADBB-NEXT: sll t5, a1, t5 -; RV32XTHEADBB-NEXT: or t4, t4, t5 +; RV32XTHEADBB-NEXT: neg t4, t3 +; RV32XTHEADBB-NEXT: srl t5, a0, t2 +; RV32XTHEADBB-NEXT: sll t4, a1, t4 +; RV32XTHEADBB-NEXT: or t4, t5, t4 ; RV32XTHEADBB-NEXT: beqz t3, .LBB17_9 ; RV32XTHEADBB-NEXT: .LBB17_8: ; RV32XTHEADBB-NEXT: mv a0, t4 @@ -1944,16 +1944,16 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: bgeu a5, t0, .LBB17_13 ; RV32XTHEADBB-NEXT: .LBB17_11: -; RV32XTHEADBB-NEXT: sll t0, a2, a4 -; RV32XTHEADBB-NEXT: srl a2, a2, a7 -; RV32XTHEADBB-NEXT: sll a4, a3, a4 -; RV32XTHEADBB-NEXT: or a2, a2, a4 +; RV32XTHEADBB-NEXT: srl a7, a2, a7 +; RV32XTHEADBB-NEXT: sll t0, a3, a4 +; RV32XTHEADBB-NEXT: sll a4, a2, a4 +; RV32XTHEADBB-NEXT: or a2, a7, t0 ; RV32XTHEADBB-NEXT: j .LBB17_14 ; RV32XTHEADBB-NEXT: .LBB17_12: ; RV32XTHEADBB-NEXT: srl a1, a1, t2 ; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB17_11 ; RV32XTHEADBB-NEXT: .LBB17_13: -; RV32XTHEADBB-NEXT: li t0, 0 +; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: sll a2, a2, a5 ; RV32XTHEADBB-NEXT: .LBB17_14: ; RV32XTHEADBB-NEXT: or a0, a6, a0 @@ -1962,8 +1962,8 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: # %bb.15: ; RV32XTHEADBB-NEXT: mv a3, a2 ; RV32XTHEADBB-NEXT: .LBB17_16: -; RV32XTHEADBB-NEXT: add a0, a0, t0 -; RV32XTHEADBB-NEXT: sltu a2, a0, t0 +; RV32XTHEADBB-NEXT: add a0, a0, a4 +; RV32XTHEADBB-NEXT: sltu a2, a0, a4 ; RV32XTHEADBB-NEXT: add a1, a1, a3 ; RV32XTHEADBB-NEXT: add a1, a1, a2 ; RV32XTHEADBB-NEXT: ret @@ -1971,8 +1971,8 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV64XTHEADBB-LABEL: rotl_64_mask_shared: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 63 -; RV64XTHEADBB-NEXT: sll a4, a0, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: sll a4, a0, a2 ; RV64XTHEADBB-NEXT: srl a0, a0, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: sll a1, a1, a2 @@ -1990,8 +1990,8 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV32I-LABEL: rotr_32_mask_shared: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a3, a2, 31 -; RV32I-NEXT: srl a4, a0, a2 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: srl a4, a0, a2 ; RV32I-NEXT: sll a0, a0, a3 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: sll a1, a1, a2 @@ -2001,8 +2001,8 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV64I-LABEL: rotr_32_mask_shared: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 31 -; RV64I-NEXT: srlw a4, a0, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: srlw a4, a0, a2 ; RV64I-NEXT: sllw a0, a0, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: sllw a1, a1, a2 @@ -2026,8 +2026,8 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV32XTHEADBB-LABEL: rotr_32_mask_shared: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a3, a2, 31 -; RV32XTHEADBB-NEXT: srl a4, a0, a2 ; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: srl a4, a0, a2 ; RV32XTHEADBB-NEXT: sll a0, a0, a3 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: sll a1, a1, a2 @@ -2037,8 +2037,8 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign ; RV64XTHEADBB-LABEL: rotr_32_mask_shared: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 31 -; RV64XTHEADBB-NEXT: srlw a4, a0, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: srlw a4, a0, a2 ; RV64XTHEADBB-NEXT: sllw a0, a0, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: sllw a1, a1, a2 @@ -2057,39 +2057,39 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I: # %bb.0: ; RV32I-NEXT: andi a5, a4, 63 ; RV32I-NEXT: li t0, 32 -; RV32I-NEXT: neg a6, a5 +; RV32I-NEXT: neg a7, a5 ; RV32I-NEXT: bltu a5, t0, .LBB19_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl t1, a1, a5 -; RV32I-NEXT: mv a7, a0 +; RV32I-NEXT: mv a6, a0 ; RV32I-NEXT: bnez a5, .LBB19_3 ; RV32I-NEXT: j .LBB19_4 ; RV32I-NEXT: .LBB19_2: -; RV32I-NEXT: srl a7, a0, a4 -; RV32I-NEXT: sll t1, a1, a6 -; RV32I-NEXT: or t1, a7, t1 -; RV32I-NEXT: mv a7, a0 +; RV32I-NEXT: srl a6, a0, a4 +; RV32I-NEXT: sll t1, a1, a7 +; RV32I-NEXT: or t1, a6, t1 +; RV32I-NEXT: mv a6, a0 ; RV32I-NEXT: beqz a5, .LBB19_4 ; RV32I-NEXT: .LBB19_3: -; RV32I-NEXT: mv a7, t1 +; RV32I-NEXT: mv a6, t1 ; RV32I-NEXT: .LBB19_4: -; RV32I-NEXT: neg t4, a5 +; RV32I-NEXT: neg t2, a5 ; RV32I-NEXT: bltu a5, t0, .LBB19_7 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: andi t3, t4, 63 +; RV32I-NEXT: andi t3, t2, 63 ; RV32I-NEXT: bgeu t3, t0, .LBB19_8 ; RV32I-NEXT: .LBB19_6: -; RV32I-NEXT: sll t2, a0, t4 -; RV32I-NEXT: neg t5, t3 -; RV32I-NEXT: srl a0, a0, t5 -; RV32I-NEXT: sll t4, a1, t4 -; RV32I-NEXT: or a0, a0, t4 +; RV32I-NEXT: neg t4, t3 +; RV32I-NEXT: srl t4, a0, t4 +; RV32I-NEXT: sll t5, a1, t2 +; RV32I-NEXT: sll t2, a0, t2 +; RV32I-NEXT: or a0, t4, t5 ; RV32I-NEXT: bnez t3, .LBB19_9 ; RV32I-NEXT: j .LBB19_10 ; RV32I-NEXT: .LBB19_7: ; RV32I-NEXT: srl t1, a1, a4 -; RV32I-NEXT: andi t3, t4, 63 +; RV32I-NEXT: andi t3, t2, 63 ; RV32I-NEXT: bltu t3, t0, .LBB19_6 ; RV32I-NEXT: .LBB19_8: ; RV32I-NEXT: li t2, 0 @@ -2100,23 +2100,23 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: .LBB19_10: ; RV32I-NEXT: bltu a5, t0, .LBB19_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: sll a0, a2, a5 ; RV32I-NEXT: j .LBB19_13 ; RV32I-NEXT: .LBB19_12: -; RV32I-NEXT: sll t0, a2, a4 -; RV32I-NEXT: srl a0, a2, a6 -; RV32I-NEXT: sll a2, a3, a4 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srl a0, a2, a7 +; RV32I-NEXT: sll a7, a3, a4 +; RV32I-NEXT: sll a4, a2, a4 +; RV32I-NEXT: or a0, a0, a7 ; RV32I-NEXT: .LBB19_13: -; RV32I-NEXT: or a2, a7, t2 +; RV32I-NEXT: or a2, a6, t2 ; RV32I-NEXT: or a1, t1, a1 ; RV32I-NEXT: beqz a5, .LBB19_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: .LBB19_15: -; RV32I-NEXT: add a0, a2, t0 -; RV32I-NEXT: sltu a2, a0, t0 +; RV32I-NEXT: add a0, a2, a4 +; RV32I-NEXT: sltu a2, a0, a4 ; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret @@ -2124,8 +2124,8 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV64I-LABEL: rotr_64_mask_shared: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 63 -; RV64I-NEXT: srl a4, a0, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: srl a4, a0, a2 ; RV64I-NEXT: sll a0, a0, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: sll a1, a1, a2 @@ -2136,39 +2136,39 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: andi a5, a4, 63 ; RV32ZBB-NEXT: li t0, 32 -; RV32ZBB-NEXT: neg a6, a5 +; RV32ZBB-NEXT: neg a7, a5 ; RV32ZBB-NEXT: bltu a5, t0, .LBB19_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: srl t1, a1, a5 -; RV32ZBB-NEXT: mv a7, a0 +; RV32ZBB-NEXT: mv a6, a0 ; RV32ZBB-NEXT: bnez a5, .LBB19_3 ; RV32ZBB-NEXT: j .LBB19_4 ; RV32ZBB-NEXT: .LBB19_2: -; RV32ZBB-NEXT: srl a7, a0, a4 -; RV32ZBB-NEXT: sll t1, a1, a6 -; RV32ZBB-NEXT: or t1, a7, t1 -; RV32ZBB-NEXT: mv a7, a0 +; RV32ZBB-NEXT: srl a6, a0, a4 +; RV32ZBB-NEXT: sll t1, a1, a7 +; RV32ZBB-NEXT: or t1, a6, t1 +; RV32ZBB-NEXT: mv a6, a0 ; RV32ZBB-NEXT: beqz a5, .LBB19_4 ; RV32ZBB-NEXT: .LBB19_3: -; RV32ZBB-NEXT: mv a7, t1 +; RV32ZBB-NEXT: mv a6, t1 ; RV32ZBB-NEXT: .LBB19_4: -; RV32ZBB-NEXT: neg t4, a5 +; RV32ZBB-NEXT: neg t2, a5 ; RV32ZBB-NEXT: bltu a5, t0, .LBB19_7 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: li t1, 0 -; RV32ZBB-NEXT: andi t3, t4, 63 +; RV32ZBB-NEXT: andi t3, t2, 63 ; RV32ZBB-NEXT: bgeu t3, t0, .LBB19_8 ; RV32ZBB-NEXT: .LBB19_6: -; RV32ZBB-NEXT: sll t2, a0, t4 -; RV32ZBB-NEXT: neg t5, t3 -; RV32ZBB-NEXT: srl a0, a0, t5 -; RV32ZBB-NEXT: sll t4, a1, t4 -; RV32ZBB-NEXT: or a0, a0, t4 +; RV32ZBB-NEXT: neg t4, t3 +; RV32ZBB-NEXT: srl t4, a0, t4 +; RV32ZBB-NEXT: sll t5, a1, t2 +; RV32ZBB-NEXT: sll t2, a0, t2 +; RV32ZBB-NEXT: or a0, t4, t5 ; RV32ZBB-NEXT: bnez t3, .LBB19_9 ; RV32ZBB-NEXT: j .LBB19_10 ; RV32ZBB-NEXT: .LBB19_7: ; RV32ZBB-NEXT: srl t1, a1, a4 -; RV32ZBB-NEXT: andi t3, t4, 63 +; RV32ZBB-NEXT: andi t3, t2, 63 ; RV32ZBB-NEXT: bltu t3, t0, .LBB19_6 ; RV32ZBB-NEXT: .LBB19_8: ; RV32ZBB-NEXT: li t2, 0 @@ -2179,23 +2179,23 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: .LBB19_10: ; RV32ZBB-NEXT: bltu a5, t0, .LBB19_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: li t0, 0 +; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: sll a0, a2, a5 ; RV32ZBB-NEXT: j .LBB19_13 ; RV32ZBB-NEXT: .LBB19_12: -; RV32ZBB-NEXT: sll t0, a2, a4 -; RV32ZBB-NEXT: srl a0, a2, a6 -; RV32ZBB-NEXT: sll a2, a3, a4 -; RV32ZBB-NEXT: or a0, a0, a2 +; RV32ZBB-NEXT: srl a0, a2, a7 +; RV32ZBB-NEXT: sll a7, a3, a4 +; RV32ZBB-NEXT: sll a4, a2, a4 +; RV32ZBB-NEXT: or a0, a0, a7 ; RV32ZBB-NEXT: .LBB19_13: -; RV32ZBB-NEXT: or a2, a7, t2 +; RV32ZBB-NEXT: or a2, a6, t2 ; RV32ZBB-NEXT: or a1, t1, a1 ; RV32ZBB-NEXT: beqz a5, .LBB19_15 ; RV32ZBB-NEXT: # %bb.14: ; RV32ZBB-NEXT: mv a3, a0 ; RV32ZBB-NEXT: .LBB19_15: -; RV32ZBB-NEXT: add a0, a2, t0 -; RV32ZBB-NEXT: sltu a2, a0, t0 +; RV32ZBB-NEXT: add a0, a2, a4 +; RV32ZBB-NEXT: sltu a2, a0, a4 ; RV32ZBB-NEXT: add a1, a1, a3 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: ret @@ -2211,39 +2211,39 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a5, a4, 63 ; RV32XTHEADBB-NEXT: li t0, 32 -; RV32XTHEADBB-NEXT: neg a6, a5 +; RV32XTHEADBB-NEXT: neg a7, a5 ; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: srl t1, a1, a5 -; RV32XTHEADBB-NEXT: mv a7, a0 +; RV32XTHEADBB-NEXT: mv a6, a0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB19_3 ; RV32XTHEADBB-NEXT: j .LBB19_4 ; RV32XTHEADBB-NEXT: .LBB19_2: -; RV32XTHEADBB-NEXT: srl a7, a0, a4 -; RV32XTHEADBB-NEXT: sll t1, a1, a6 -; RV32XTHEADBB-NEXT: or t1, a7, t1 -; RV32XTHEADBB-NEXT: mv a7, a0 +; RV32XTHEADBB-NEXT: srl a6, a0, a4 +; RV32XTHEADBB-NEXT: sll t1, a1, a7 +; RV32XTHEADBB-NEXT: or t1, a6, t1 +; RV32XTHEADBB-NEXT: mv a6, a0 ; RV32XTHEADBB-NEXT: beqz a5, .LBB19_4 ; RV32XTHEADBB-NEXT: .LBB19_3: -; RV32XTHEADBB-NEXT: mv a7, t1 +; RV32XTHEADBB-NEXT: mv a6, t1 ; RV32XTHEADBB-NEXT: .LBB19_4: -; RV32XTHEADBB-NEXT: neg t4, a5 +; RV32XTHEADBB-NEXT: neg t2, a5 ; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_7 ; RV32XTHEADBB-NEXT: # %bb.5: ; RV32XTHEADBB-NEXT: li t1, 0 -; RV32XTHEADBB-NEXT: andi t3, t4, 63 +; RV32XTHEADBB-NEXT: andi t3, t2, 63 ; RV32XTHEADBB-NEXT: bgeu t3, t0, .LBB19_8 ; RV32XTHEADBB-NEXT: .LBB19_6: -; RV32XTHEADBB-NEXT: sll t2, a0, t4 -; RV32XTHEADBB-NEXT: neg t5, t3 -; RV32XTHEADBB-NEXT: srl a0, a0, t5 -; RV32XTHEADBB-NEXT: sll t4, a1, t4 -; RV32XTHEADBB-NEXT: or a0, a0, t4 +; RV32XTHEADBB-NEXT: neg t4, t3 +; RV32XTHEADBB-NEXT: srl t4, a0, t4 +; RV32XTHEADBB-NEXT: sll t5, a1, t2 +; RV32XTHEADBB-NEXT: sll t2, a0, t2 +; RV32XTHEADBB-NEXT: or a0, t4, t5 ; RV32XTHEADBB-NEXT: bnez t3, .LBB19_9 ; RV32XTHEADBB-NEXT: j .LBB19_10 ; RV32XTHEADBB-NEXT: .LBB19_7: ; RV32XTHEADBB-NEXT: srl t1, a1, a4 -; RV32XTHEADBB-NEXT: andi t3, t4, 63 +; RV32XTHEADBB-NEXT: andi t3, t2, 63 ; RV32XTHEADBB-NEXT: bltu t3, t0, .LBB19_6 ; RV32XTHEADBB-NEXT: .LBB19_8: ; RV32XTHEADBB-NEXT: li t2, 0 @@ -2254,23 +2254,23 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: .LBB19_10: ; RV32XTHEADBB-NEXT: bltu a5, t0, .LBB19_12 ; RV32XTHEADBB-NEXT: # %bb.11: -; RV32XTHEADBB-NEXT: li t0, 0 +; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: sll a0, a2, a5 ; RV32XTHEADBB-NEXT: j .LBB19_13 ; RV32XTHEADBB-NEXT: .LBB19_12: -; RV32XTHEADBB-NEXT: sll t0, a2, a4 -; RV32XTHEADBB-NEXT: srl a0, a2, a6 -; RV32XTHEADBB-NEXT: sll a2, a3, a4 -; RV32XTHEADBB-NEXT: or a0, a0, a2 +; RV32XTHEADBB-NEXT: srl a0, a2, a7 +; RV32XTHEADBB-NEXT: sll a7, a3, a4 +; RV32XTHEADBB-NEXT: sll a4, a2, a4 +; RV32XTHEADBB-NEXT: or a0, a0, a7 ; RV32XTHEADBB-NEXT: .LBB19_13: -; RV32XTHEADBB-NEXT: or a2, a7, t2 +; RV32XTHEADBB-NEXT: or a2, a6, t2 ; RV32XTHEADBB-NEXT: or a1, t1, a1 ; RV32XTHEADBB-NEXT: beqz a5, .LBB19_15 ; RV32XTHEADBB-NEXT: # %bb.14: ; RV32XTHEADBB-NEXT: mv a3, a0 ; RV32XTHEADBB-NEXT: .LBB19_15: -; RV32XTHEADBB-NEXT: add a0, a2, t0 -; RV32XTHEADBB-NEXT: sltu a2, a0, t0 +; RV32XTHEADBB-NEXT: add a0, a2, a4 +; RV32XTHEADBB-NEXT: sltu a2, a0, a4 ; RV32XTHEADBB-NEXT: add a1, a1, a3 ; RV32XTHEADBB-NEXT: add a1, a1, a2 ; RV32XTHEADBB-NEXT: ret @@ -2278,8 +2278,8 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV64XTHEADBB-LABEL: rotr_64_mask_shared: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 63 -; RV64XTHEADBB-NEXT: srl a4, a0, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: srl a4, a0, a2 ; RV64XTHEADBB-NEXT: sll a0, a0, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: sll a1, a1, a2 @@ -2297,10 +2297,10 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV32I-LABEL: rotl_32_mask_multiple: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a3, a2, 31 -; RV32I-NEXT: sll a4, a0, a2 -; RV32I-NEXT: sll a2, a1, a2 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: sll a4, a0, a2 ; RV32I-NEXT: srl a0, a0, a3 +; RV32I-NEXT: sll a2, a1, a2 ; RV32I-NEXT: srl a1, a1, a3 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a2, a1 @@ -2311,10 +2311,10 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 31 ; RV64I-NEXT: sllw a4, a0, a2 -; RV64I-NEXT: sllw a2, a1, a2 ; RV64I-NEXT: neg a5, a3 ; RV64I-NEXT: neg a3, a3 ; RV64I-NEXT: srlw a0, a0, a5 +; RV64I-NEXT: sllw a2, a1, a2 ; RV64I-NEXT: srlw a1, a1, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: or a1, a2, a1 @@ -2338,10 +2338,10 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV32XTHEADBB-LABEL: rotl_32_mask_multiple: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a3, a2, 31 -; RV32XTHEADBB-NEXT: sll a4, a0, a2 -; RV32XTHEADBB-NEXT: sll a2, a1, a2 ; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: sll a4, a0, a2 ; RV32XTHEADBB-NEXT: srl a0, a0, a3 +; RV32XTHEADBB-NEXT: sll a2, a1, a2 ; RV32XTHEADBB-NEXT: srl a1, a1, a3 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 @@ -2352,10 +2352,10 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 31 ; RV64XTHEADBB-NEXT: sllw a4, a0, a2 -; RV64XTHEADBB-NEXT: sllw a2, a1, a2 ; RV64XTHEADBB-NEXT: neg a5, a3 ; RV64XTHEADBB-NEXT: neg a3, a3 ; RV64XTHEADBB-NEXT: srlw a0, a0, a5 +; RV64XTHEADBB-NEXT: sllw a2, a1, a2 ; RV64XTHEADBB-NEXT: srlw a1, a1, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: or a1, a2, a1 @@ -2380,9 +2380,9 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: sll t2, a0, t1 ; RV32I-NEXT: j .LBB21_3 ; RV32I-NEXT: .LBB21_2: -; RV32I-NEXT: sll a6, a0, a4 ; RV32I-NEXT: srl a7, a0, t3 ; RV32I-NEXT: sll t0, a1, a4 +; RV32I-NEXT: sll a6, a0, a4 ; RV32I-NEXT: or t2, a7, t0 ; RV32I-NEXT: .LBB21_3: ; RV32I-NEXT: neg a7, t1 @@ -2408,26 +2408,26 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: .LBB21_9: ; RV32I-NEXT: bltu t2, a5, .LBB21_12 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: bgeu t1, a5, .LBB21_13 ; RV32I-NEXT: .LBB21_11: -; RV32I-NEXT: sll a1, a2, a4 ; RV32I-NEXT: srl t3, a2, t3 -; RV32I-NEXT: sll a4, a3, a4 -; RV32I-NEXT: or t3, t3, a4 -; RV32I-NEXT: mv a4, a3 +; RV32I-NEXT: sll t5, a3, a4 +; RV32I-NEXT: sll a4, a2, a4 +; RV32I-NEXT: or t5, t3, t5 +; RV32I-NEXT: mv t3, a3 ; RV32I-NEXT: bnez t1, .LBB21_14 ; RV32I-NEXT: j .LBB21_15 ; RV32I-NEXT: .LBB21_12: -; RV32I-NEXT: srl t5, a1, a7 +; RV32I-NEXT: srl a1, a1, a7 ; RV32I-NEXT: bltu t1, a5, .LBB21_11 ; RV32I-NEXT: .LBB21_13: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sll t3, a2, t1 -; RV32I-NEXT: mv a4, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: sll t5, a2, t1 +; RV32I-NEXT: mv t3, a3 ; RV32I-NEXT: beqz t1, .LBB21_15 ; RV32I-NEXT: .LBB21_14: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB21_15: ; RV32I-NEXT: bltu t2, a5, .LBB21_17 ; RV32I-NEXT: # %bb.16: @@ -2436,14 +2436,14 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: j .LBB21_19 ; RV32I-NEXT: .LBB21_17: ; RV32I-NEXT: srl t1, a2, a7 -; RV32I-NEXT: sll t3, a3, t4 -; RV32I-NEXT: or t1, t1, t3 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or t1, t1, t4 ; RV32I-NEXT: beqz t2, .LBB21_19 ; RV32I-NEXT: .LBB21_18: ; RV32I-NEXT: mv a2, t1 ; RV32I-NEXT: .LBB21_19: ; RV32I-NEXT: or a0, a6, a0 -; RV32I-NEXT: or a6, t0, t5 +; RV32I-NEXT: or a1, t0, a1 ; RV32I-NEXT: bltu t2, a5, .LBB21_21 ; RV32I-NEXT: # %bb.20: ; RV32I-NEXT: li a3, 0 @@ -2451,21 +2451,21 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: .LBB21_21: ; RV32I-NEXT: srl a3, a3, a7 ; RV32I-NEXT: .LBB21_22: -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a3, a6, a3 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: or a2, a4, a2 +; RV32I-NEXT: or a3, t3, a3 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: sltu a2, a0, a2 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_multiple: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 63 -; RV64I-NEXT: sll a4, a0, a2 -; RV64I-NEXT: sll a2, a1, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: sll a4, a0, a2 ; RV64I-NEXT: srl a0, a0, a3 +; RV64I-NEXT: sll a2, a1, a2 ; RV64I-NEXT: srl a1, a1, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: or a1, a2, a1 @@ -2483,9 +2483,9 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: sll t2, a0, t1 ; RV32ZBB-NEXT: j .LBB21_3 ; RV32ZBB-NEXT: .LBB21_2: -; RV32ZBB-NEXT: sll a6, a0, a4 ; RV32ZBB-NEXT: srl a7, a0, t3 ; RV32ZBB-NEXT: sll t0, a1, a4 +; RV32ZBB-NEXT: sll a6, a0, a4 ; RV32ZBB-NEXT: or t2, a7, t0 ; RV32ZBB-NEXT: .LBB21_3: ; RV32ZBB-NEXT: neg a7, t1 @@ -2511,26 +2511,26 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: .LBB21_9: ; RV32ZBB-NEXT: bltu t2, a5, .LBB21_12 ; RV32ZBB-NEXT: # %bb.10: -; RV32ZBB-NEXT: li t5, 0 +; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: bgeu t1, a5, .LBB21_13 ; RV32ZBB-NEXT: .LBB21_11: -; RV32ZBB-NEXT: sll a1, a2, a4 ; RV32ZBB-NEXT: srl t3, a2, t3 -; RV32ZBB-NEXT: sll a4, a3, a4 -; RV32ZBB-NEXT: or t3, t3, a4 -; RV32ZBB-NEXT: mv a4, a3 +; RV32ZBB-NEXT: sll t5, a3, a4 +; RV32ZBB-NEXT: sll a4, a2, a4 +; RV32ZBB-NEXT: or t5, t3, t5 +; RV32ZBB-NEXT: mv t3, a3 ; RV32ZBB-NEXT: bnez t1, .LBB21_14 ; RV32ZBB-NEXT: j .LBB21_15 ; RV32ZBB-NEXT: .LBB21_12: -; RV32ZBB-NEXT: srl t5, a1, a7 +; RV32ZBB-NEXT: srl a1, a1, a7 ; RV32ZBB-NEXT: bltu t1, a5, .LBB21_11 ; RV32ZBB-NEXT: .LBB21_13: -; RV32ZBB-NEXT: li a1, 0 -; RV32ZBB-NEXT: sll t3, a2, t1 -; RV32ZBB-NEXT: mv a4, a3 +; RV32ZBB-NEXT: li a4, 0 +; RV32ZBB-NEXT: sll t5, a2, t1 +; RV32ZBB-NEXT: mv t3, a3 ; RV32ZBB-NEXT: beqz t1, .LBB21_15 ; RV32ZBB-NEXT: .LBB21_14: -; RV32ZBB-NEXT: mv a4, t3 +; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB21_15: ; RV32ZBB-NEXT: bltu t2, a5, .LBB21_17 ; RV32ZBB-NEXT: # %bb.16: @@ -2539,14 +2539,14 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: j .LBB21_19 ; RV32ZBB-NEXT: .LBB21_17: ; RV32ZBB-NEXT: srl t1, a2, a7 -; RV32ZBB-NEXT: sll t3, a3, t4 -; RV32ZBB-NEXT: or t1, t1, t3 +; RV32ZBB-NEXT: sll t4, a3, t4 +; RV32ZBB-NEXT: or t1, t1, t4 ; RV32ZBB-NEXT: beqz t2, .LBB21_19 ; RV32ZBB-NEXT: .LBB21_18: ; RV32ZBB-NEXT: mv a2, t1 ; RV32ZBB-NEXT: .LBB21_19: ; RV32ZBB-NEXT: or a0, a6, a0 -; RV32ZBB-NEXT: or a6, t0, t5 +; RV32ZBB-NEXT: or a1, t0, a1 ; RV32ZBB-NEXT: bltu t2, a5, .LBB21_21 ; RV32ZBB-NEXT: # %bb.20: ; RV32ZBB-NEXT: li a3, 0 @@ -2554,12 +2554,12 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: .LBB21_21: ; RV32ZBB-NEXT: srl a3, a3, a7 ; RV32ZBB-NEXT: .LBB21_22: -; RV32ZBB-NEXT: or a1, a1, a2 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: add a0, a0, a1 -; RV32ZBB-NEXT: sltu a1, a0, a1 -; RV32ZBB-NEXT: add a3, a6, a3 -; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: or a2, a4, a2 +; RV32ZBB-NEXT: or a3, t3, a3 +; RV32ZBB-NEXT: add a0, a0, a2 +; RV32ZBB-NEXT: sltu a2, a0, a2 +; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_multiple: @@ -2580,9 +2580,9 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: sll t2, a0, t1 ; RV32XTHEADBB-NEXT: j .LBB21_3 ; RV32XTHEADBB-NEXT: .LBB21_2: -; RV32XTHEADBB-NEXT: sll a6, a0, a4 ; RV32XTHEADBB-NEXT: srl a7, a0, t3 ; RV32XTHEADBB-NEXT: sll t0, a1, a4 +; RV32XTHEADBB-NEXT: sll a6, a0, a4 ; RV32XTHEADBB-NEXT: or t2, a7, t0 ; RV32XTHEADBB-NEXT: .LBB21_3: ; RV32XTHEADBB-NEXT: neg a7, t1 @@ -2608,26 +2608,26 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: .LBB21_9: ; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_12 ; RV32XTHEADBB-NEXT: # %bb.10: -; RV32XTHEADBB-NEXT: li t5, 0 +; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: bgeu t1, a5, .LBB21_13 ; RV32XTHEADBB-NEXT: .LBB21_11: -; RV32XTHEADBB-NEXT: sll a1, a2, a4 ; RV32XTHEADBB-NEXT: srl t3, a2, t3 -; RV32XTHEADBB-NEXT: sll a4, a3, a4 -; RV32XTHEADBB-NEXT: or t3, t3, a4 -; RV32XTHEADBB-NEXT: mv a4, a3 +; RV32XTHEADBB-NEXT: sll t5, a3, a4 +; RV32XTHEADBB-NEXT: sll a4, a2, a4 +; RV32XTHEADBB-NEXT: or t5, t3, t5 +; RV32XTHEADBB-NEXT: mv t3, a3 ; RV32XTHEADBB-NEXT: bnez t1, .LBB21_14 ; RV32XTHEADBB-NEXT: j .LBB21_15 ; RV32XTHEADBB-NEXT: .LBB21_12: -; RV32XTHEADBB-NEXT: srl t5, a1, a7 +; RV32XTHEADBB-NEXT: srl a1, a1, a7 ; RV32XTHEADBB-NEXT: bltu t1, a5, .LBB21_11 ; RV32XTHEADBB-NEXT: .LBB21_13: -; RV32XTHEADBB-NEXT: li a1, 0 -; RV32XTHEADBB-NEXT: sll t3, a2, t1 -; RV32XTHEADBB-NEXT: mv a4, a3 +; RV32XTHEADBB-NEXT: li a4, 0 +; RV32XTHEADBB-NEXT: sll t5, a2, t1 +; RV32XTHEADBB-NEXT: mv t3, a3 ; RV32XTHEADBB-NEXT: beqz t1, .LBB21_15 ; RV32XTHEADBB-NEXT: .LBB21_14: -; RV32XTHEADBB-NEXT: mv a4, t3 +; RV32XTHEADBB-NEXT: mv t3, t5 ; RV32XTHEADBB-NEXT: .LBB21_15: ; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_17 ; RV32XTHEADBB-NEXT: # %bb.16: @@ -2636,14 +2636,14 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: j .LBB21_19 ; RV32XTHEADBB-NEXT: .LBB21_17: ; RV32XTHEADBB-NEXT: srl t1, a2, a7 -; RV32XTHEADBB-NEXT: sll t3, a3, t4 -; RV32XTHEADBB-NEXT: or t1, t1, t3 +; RV32XTHEADBB-NEXT: sll t4, a3, t4 +; RV32XTHEADBB-NEXT: or t1, t1, t4 ; RV32XTHEADBB-NEXT: beqz t2, .LBB21_19 ; RV32XTHEADBB-NEXT: .LBB21_18: ; RV32XTHEADBB-NEXT: mv a2, t1 ; RV32XTHEADBB-NEXT: .LBB21_19: ; RV32XTHEADBB-NEXT: or a0, a6, a0 -; RV32XTHEADBB-NEXT: or a6, t0, t5 +; RV32XTHEADBB-NEXT: or a1, t0, a1 ; RV32XTHEADBB-NEXT: bltu t2, a5, .LBB21_21 ; RV32XTHEADBB-NEXT: # %bb.20: ; RV32XTHEADBB-NEXT: li a3, 0 @@ -2651,21 +2651,21 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: .LBB21_21: ; RV32XTHEADBB-NEXT: srl a3, a3, a7 ; RV32XTHEADBB-NEXT: .LBB21_22: -; RV32XTHEADBB-NEXT: or a1, a1, a2 -; RV32XTHEADBB-NEXT: or a3, a4, a3 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: sltu a1, a0, a1 -; RV32XTHEADBB-NEXT: add a3, a6, a3 -; RV32XTHEADBB-NEXT: add a1, a3, a1 +; RV32XTHEADBB-NEXT: or a2, a4, a2 +; RV32XTHEADBB-NEXT: or a3, t3, a3 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: sltu a2, a0, a2 +; RV32XTHEADBB-NEXT: add a1, a1, a3 +; RV32XTHEADBB-NEXT: add a1, a1, a2 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask_multiple: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 63 -; RV64XTHEADBB-NEXT: sll a4, a0, a2 -; RV64XTHEADBB-NEXT: sll a2, a1, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: sll a4, a0, a2 ; RV64XTHEADBB-NEXT: srl a0, a0, a3 +; RV64XTHEADBB-NEXT: sll a2, a1, a2 ; RV64XTHEADBB-NEXT: srl a1, a1, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: or a1, a2, a1 @@ -2682,10 +2682,10 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV32I-LABEL: rotr_32_mask_multiple: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a3, a2, 31 -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: srl a2, a1, a2 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: srl a4, a0, a2 ; RV32I-NEXT: sll a0, a0, a3 +; RV32I-NEXT: srl a2, a1, a2 ; RV32I-NEXT: sll a1, a1, a3 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a2, a1 @@ -2696,10 +2696,10 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 31 ; RV64I-NEXT: srlw a4, a0, a2 -; RV64I-NEXT: srlw a2, a1, a2 ; RV64I-NEXT: neg a5, a3 ; RV64I-NEXT: neg a3, a3 ; RV64I-NEXT: sllw a0, a0, a5 +; RV64I-NEXT: srlw a2, a1, a2 ; RV64I-NEXT: sllw a1, a1, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: or a1, a2, a1 @@ -2723,10 +2723,10 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV32XTHEADBB-LABEL: rotr_32_mask_multiple: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: andi a3, a2, 31 -; RV32XTHEADBB-NEXT: srl a4, a0, a2 -; RV32XTHEADBB-NEXT: srl a2, a1, a2 ; RV32XTHEADBB-NEXT: neg a3, a3 +; RV32XTHEADBB-NEXT: srl a4, a0, a2 ; RV32XTHEADBB-NEXT: sll a0, a0, a3 +; RV32XTHEADBB-NEXT: srl a2, a1, a2 ; RV32XTHEADBB-NEXT: sll a1, a1, a3 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 @@ -2737,10 +2737,10 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 31 ; RV64XTHEADBB-NEXT: srlw a4, a0, a2 -; RV64XTHEADBB-NEXT: srlw a2, a1, a2 ; RV64XTHEADBB-NEXT: neg a5, a3 ; RV64XTHEADBB-NEXT: neg a3, a3 ; RV64XTHEADBB-NEXT: sllw a0, a0, a5 +; RV64XTHEADBB-NEXT: srlw a2, a1, a2 ; RV64XTHEADBB-NEXT: sllw a1, a1, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: or a1, a2, a1 @@ -2791,10 +2791,14 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: bnez t1, .LBB23_10 ; RV32I-NEXT: j .LBB23_11 ; RV32I-NEXT: .LBB23_9: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: srl t6, a0, t5 +; RV32I-NEXT: sll s0, a1, t2 ; RV32I-NEXT: sll t3, a0, t2 -; RV32I-NEXT: srl a0, a0, t5 -; RV32I-NEXT: sll t6, a1, t2 -; RV32I-NEXT: or a0, a0, t6 +; RV32I-NEXT: or a0, t6, s0 +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: beqz t1, .LBB23_11 ; RV32I-NEXT: .LBB23_10: ; RV32I-NEXT: mv a1, a0 @@ -2819,10 +2823,10 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: bgeu t1, a6, .LBB23_19 ; RV32I-NEXT: .LBB23_17: +; RV32I-NEXT: srl t0, a2, t5 +; RV32I-NEXT: sll t4, a3, t2 ; RV32I-NEXT: sll a6, a2, t2 -; RV32I-NEXT: srl a2, a2, t5 -; RV32I-NEXT: sll t0, a3, t2 -; RV32I-NEXT: or a2, a2, t0 +; RV32I-NEXT: or a2, t0, t4 ; RV32I-NEXT: j .LBB23_20 ; RV32I-NEXT: .LBB23_18: ; RV32I-NEXT: srl a4, a3, a4 @@ -2848,10 +2852,10 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV64I-LABEL: rotr_64_mask_multiple: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a3, a2, 63 -; RV64I-NEXT: srl a4, a0, a2 -; RV64I-NEXT: srl a2, a1, a2 ; RV64I-NEXT: neg a3, a3 +; RV64I-NEXT: srl a4, a0, a2 ; RV64I-NEXT: sll a0, a0, a3 +; RV64I-NEXT: srl a2, a1, a2 ; RV64I-NEXT: sll a1, a1, a3 ; RV64I-NEXT: or a0, a4, a0 ; RV64I-NEXT: or a1, a2, a1 @@ -2895,10 +2899,14 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: bnez t1, .LBB23_10 ; RV32ZBB-NEXT: j .LBB23_11 ; RV32ZBB-NEXT: .LBB23_9: +; RV32ZBB-NEXT: addi sp, sp, -16 +; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: srl t6, a0, t5 +; RV32ZBB-NEXT: sll s0, a1, t2 ; RV32ZBB-NEXT: sll t3, a0, t2 -; RV32ZBB-NEXT: srl a0, a0, t5 -; RV32ZBB-NEXT: sll t6, a1, t2 -; RV32ZBB-NEXT: or a0, a0, t6 +; RV32ZBB-NEXT: or a0, t6, s0 +; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: addi sp, sp, 16 ; RV32ZBB-NEXT: beqz t1, .LBB23_11 ; RV32ZBB-NEXT: .LBB23_10: ; RV32ZBB-NEXT: mv a1, a0 @@ -2923,10 +2931,10 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: bgeu t1, a6, .LBB23_19 ; RV32ZBB-NEXT: .LBB23_17: +; RV32ZBB-NEXT: srl t0, a2, t5 +; RV32ZBB-NEXT: sll t4, a3, t2 ; RV32ZBB-NEXT: sll a6, a2, t2 -; RV32ZBB-NEXT: srl a2, a2, t5 -; RV32ZBB-NEXT: sll t0, a3, t2 -; RV32ZBB-NEXT: or a2, a2, t0 +; RV32ZBB-NEXT: or a2, t0, t4 ; RV32ZBB-NEXT: j .LBB23_20 ; RV32ZBB-NEXT: .LBB23_18: ; RV32ZBB-NEXT: srl a4, a3, a4 @@ -2993,10 +3001,14 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: bnez t1, .LBB23_10 ; RV32XTHEADBB-NEXT: j .LBB23_11 ; RV32XTHEADBB-NEXT: .LBB23_9: +; RV32XTHEADBB-NEXT: addi sp, sp, -16 +; RV32XTHEADBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32XTHEADBB-NEXT: srl t6, a0, t5 +; RV32XTHEADBB-NEXT: sll s0, a1, t2 ; RV32XTHEADBB-NEXT: sll t3, a0, t2 -; RV32XTHEADBB-NEXT: srl a0, a0, t5 -; RV32XTHEADBB-NEXT: sll t6, a1, t2 -; RV32XTHEADBB-NEXT: or a0, a0, t6 +; RV32XTHEADBB-NEXT: or a0, t6, s0 +; RV32XTHEADBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32XTHEADBB-NEXT: addi sp, sp, 16 ; RV32XTHEADBB-NEXT: beqz t1, .LBB23_11 ; RV32XTHEADBB-NEXT: .LBB23_10: ; RV32XTHEADBB-NEXT: mv a1, a0 @@ -3021,10 +3033,10 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: bgeu t1, a6, .LBB23_19 ; RV32XTHEADBB-NEXT: .LBB23_17: +; RV32XTHEADBB-NEXT: srl t0, a2, t5 +; RV32XTHEADBB-NEXT: sll t4, a3, t2 ; RV32XTHEADBB-NEXT: sll a6, a2, t2 -; RV32XTHEADBB-NEXT: srl a2, a2, t5 -; RV32XTHEADBB-NEXT: sll t0, a3, t2 -; RV32XTHEADBB-NEXT: or a2, a2, t0 +; RV32XTHEADBB-NEXT: or a2, t0, t4 ; RV32XTHEADBB-NEXT: j .LBB23_20 ; RV32XTHEADBB-NEXT: .LBB23_18: ; RV32XTHEADBB-NEXT: srl a4, a3, a4 @@ -3050,10 +3062,10 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; RV64XTHEADBB-LABEL: rotr_64_mask_multiple: ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: andi a3, a2, 63 -; RV64XTHEADBB-NEXT: srl a4, a0, a2 -; RV64XTHEADBB-NEXT: srl a2, a1, a2 ; RV64XTHEADBB-NEXT: neg a3, a3 +; RV64XTHEADBB-NEXT: srl a4, a0, a2 ; RV64XTHEADBB-NEXT: sll a0, a0, a3 +; RV64XTHEADBB-NEXT: srl a2, a1, a2 ; RV64XTHEADBB-NEXT: sll a1, a1, a3 ; RV64XTHEADBB-NEXT: or a0, a4, a0 ; RV64XTHEADBB-NEXT: or a1, a2, a1 @@ -3069,18 +3081,18 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-LABEL: rotl_64_zext: ; RV32I: # %bb.0: +; RV32I-NEXT: li a3, 32 +; RV32I-NEXT: neg a4, a2 ; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: srl a7, a0, a5 -; RV32I-NEXT: bltu a2, a4, .LBB24_2 +; RV32I-NEXT: srl a7, a0, a4 +; RV32I-NEXT: bltu a2, a3, .LBB24_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: sll t1, a0, a2 ; RV32I-NEXT: j .LBB24_3 ; RV32I-NEXT: .LBB24_2: -; RV32I-NEXT: sll a3, a0, a2 ; RV32I-NEXT: sll t0, a1, a2 +; RV32I-NEXT: sll a5, a0, a2 ; RV32I-NEXT: or t1, a7, t0 ; RV32I-NEXT: .LBB24_3: ; RV32I-NEXT: sub t0, a6, a2 @@ -3089,7 +3101,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a6, t1 ; RV32I-NEXT: .LBB24_5: -; RV32I-NEXT: bltu t0, a4, .LBB24_7 +; RV32I-NEXT: bltu t0, a3, .LBB24_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: srl a2, a1, t0 ; RV32I-NEXT: bnez t0, .LBB24_8 @@ -3102,14 +3114,14 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-NEXT: .LBB24_8: ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB24_9: -; RV32I-NEXT: bltu t0, a4, .LBB24_11 +; RV32I-NEXT: bltu t0, a3, .LBB24_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB24_12 ; RV32I-NEXT: .LBB24_11: -; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: srl a1, a1, a4 ; RV32I-NEXT: .LBB24_12: -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a1, a6, a1 ; RV32I-NEXT: ret ; @@ -3124,18 +3136,18 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; ; RV32ZBB-LABEL: rotl_64_zext: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: li a3, 32 +; RV32ZBB-NEXT: neg a4, a2 ; RV32ZBB-NEXT: li a6, 64 -; RV32ZBB-NEXT: li a4, 32 -; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: srl a7, a0, a5 -; RV32ZBB-NEXT: bltu a2, a4, .LBB24_2 +; RV32ZBB-NEXT: srl a7, a0, a4 +; RV32ZBB-NEXT: bltu a2, a3, .LBB24_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: li a3, 0 +; RV32ZBB-NEXT: li a5, 0 ; RV32ZBB-NEXT: sll t1, a0, a2 ; RV32ZBB-NEXT: j .LBB24_3 ; RV32ZBB-NEXT: .LBB24_2: -; RV32ZBB-NEXT: sll a3, a0, a2 ; RV32ZBB-NEXT: sll t0, a1, a2 +; RV32ZBB-NEXT: sll a5, a0, a2 ; RV32ZBB-NEXT: or t1, a7, t0 ; RV32ZBB-NEXT: .LBB24_3: ; RV32ZBB-NEXT: sub t0, a6, a2 @@ -3144,7 +3156,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32ZBB-NEXT: # %bb.4: ; RV32ZBB-NEXT: mv a6, t1 ; RV32ZBB-NEXT: .LBB24_5: -; RV32ZBB-NEXT: bltu t0, a4, .LBB24_7 +; RV32ZBB-NEXT: bltu t0, a3, .LBB24_7 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: srl a2, a1, t0 ; RV32ZBB-NEXT: bnez t0, .LBB24_8 @@ -3157,14 +3169,14 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32ZBB-NEXT: .LBB24_8: ; RV32ZBB-NEXT: mv a0, a2 ; RV32ZBB-NEXT: .LBB24_9: -; RV32ZBB-NEXT: bltu t0, a4, .LBB24_11 +; RV32ZBB-NEXT: bltu t0, a3, .LBB24_11 ; RV32ZBB-NEXT: # %bb.10: ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: j .LBB24_12 ; RV32ZBB-NEXT: .LBB24_11: -; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: srl a1, a1, a4 ; RV32ZBB-NEXT: .LBB24_12: -; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a0, a5, a0 ; RV32ZBB-NEXT: or a1, a6, a1 ; RV32ZBB-NEXT: ret ; @@ -3179,18 +3191,18 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64_zext: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: li a3, 32 +; RV32XTHEADBB-NEXT: neg a4, a2 ; RV32XTHEADBB-NEXT: li a6, 64 -; RV32XTHEADBB-NEXT: li a4, 32 -; RV32XTHEADBB-NEXT: neg a5, a2 -; RV32XTHEADBB-NEXT: srl a7, a0, a5 -; RV32XTHEADBB-NEXT: bltu a2, a4, .LBB24_2 +; RV32XTHEADBB-NEXT: srl a7, a0, a4 +; RV32XTHEADBB-NEXT: bltu a2, a3, .LBB24_2 ; RV32XTHEADBB-NEXT: # %bb.1: -; RV32XTHEADBB-NEXT: li a3, 0 +; RV32XTHEADBB-NEXT: li a5, 0 ; RV32XTHEADBB-NEXT: sll t1, a0, a2 ; RV32XTHEADBB-NEXT: j .LBB24_3 ; RV32XTHEADBB-NEXT: .LBB24_2: -; RV32XTHEADBB-NEXT: sll a3, a0, a2 ; RV32XTHEADBB-NEXT: sll t0, a1, a2 +; RV32XTHEADBB-NEXT: sll a5, a0, a2 ; RV32XTHEADBB-NEXT: or t1, a7, t0 ; RV32XTHEADBB-NEXT: .LBB24_3: ; RV32XTHEADBB-NEXT: sub t0, a6, a2 @@ -3199,7 +3211,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32XTHEADBB-NEXT: # %bb.4: ; RV32XTHEADBB-NEXT: mv a6, t1 ; RV32XTHEADBB-NEXT: .LBB24_5: -; RV32XTHEADBB-NEXT: bltu t0, a4, .LBB24_7 +; RV32XTHEADBB-NEXT: bltu t0, a3, .LBB24_7 ; RV32XTHEADBB-NEXT: # %bb.6: ; RV32XTHEADBB-NEXT: srl a2, a1, t0 ; RV32XTHEADBB-NEXT: bnez t0, .LBB24_8 @@ -3212,14 +3224,14 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32XTHEADBB-NEXT: .LBB24_8: ; RV32XTHEADBB-NEXT: mv a0, a2 ; RV32XTHEADBB-NEXT: .LBB24_9: -; RV32XTHEADBB-NEXT: bltu t0, a4, .LBB24_11 +; RV32XTHEADBB-NEXT: bltu t0, a3, .LBB24_11 ; RV32XTHEADBB-NEXT: # %bb.10: ; RV32XTHEADBB-NEXT: li a1, 0 ; RV32XTHEADBB-NEXT: j .LBB24_12 ; RV32XTHEADBB-NEXT: .LBB24_11: -; RV32XTHEADBB-NEXT: srl a1, a1, a5 +; RV32XTHEADBB-NEXT: srl a1, a1, a4 ; RV32XTHEADBB-NEXT: .LBB24_12: -; RV32XTHEADBB-NEXT: or a0, a3, a0 +; RV32XTHEADBB-NEXT: or a0, a5, a0 ; RV32XTHEADBB-NEXT: or a1, a6, a1 ; RV32XTHEADBB-NEXT: ret ; @@ -3243,16 +3255,16 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-LABEL: rotr_64_zext: ; RV32I: # %bb.0: -; RV32I-NEXT: li a5, 32 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: sll a4, a1, a6 -; RV32I-NEXT: bltu a2, a5, .LBB25_2 +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: sll a5, a1, a4 +; RV32I-NEXT: bltu a2, a6, .LBB25_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a7, a1, a2 ; RV32I-NEXT: j .LBB25_3 ; RV32I-NEXT: .LBB25_2: ; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: or a7, a3, a4 +; RV32I-NEXT: or a7, a3, a5 ; RV32I-NEXT: .LBB25_3: ; RV32I-NEXT: li t0, 64 ; RV32I-NEXT: mv a3, a0 @@ -3261,28 +3273,28 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB25_5: ; RV32I-NEXT: sub a7, t0, a2 -; RV32I-NEXT: bltu a2, a5, .LBB25_8 +; RV32I-NEXT: bltu a2, a6, .LBB25_8 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: bgeu a7, a5, .LBB25_9 +; RV32I-NEXT: bgeu a7, a6, .LBB25_9 ; RV32I-NEXT: .LBB25_7: -; RV32I-NEXT: sll a5, a0, a6 ; RV32I-NEXT: neg a6, a7 -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srl a6, a0, a6 +; RV32I-NEXT: sll a4, a0, a4 +; RV32I-NEXT: or a0, a6, a5 ; RV32I-NEXT: bnez a7, .LBB25_10 ; RV32I-NEXT: j .LBB25_11 ; RV32I-NEXT: .LBB25_8: ; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: bltu a7, a5, .LBB25_7 +; RV32I-NEXT: bltu a7, a6, .LBB25_7 ; RV32I-NEXT: .LBB25_9: -; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: sll a0, a0, a7 ; RV32I-NEXT: beqz a7, .LBB25_11 ; RV32I-NEXT: .LBB25_10: ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB25_11: -; RV32I-NEXT: or a0, a3, a5 +; RV32I-NEXT: or a0, a3, a4 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; @@ -3297,16 +3309,16 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; ; RV32ZBB-LABEL: rotr_64_zext: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: li a5, 32 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: sll a4, a1, a6 -; RV32ZBB-NEXT: bltu a2, a5, .LBB25_2 +; RV32ZBB-NEXT: li a6, 32 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: sll a5, a1, a4 +; RV32ZBB-NEXT: bltu a2, a6, .LBB25_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: srl a7, a1, a2 ; RV32ZBB-NEXT: j .LBB25_3 ; RV32ZBB-NEXT: .LBB25_2: ; RV32ZBB-NEXT: srl a3, a0, a2 -; RV32ZBB-NEXT: or a7, a3, a4 +; RV32ZBB-NEXT: or a7, a3, a5 ; RV32ZBB-NEXT: .LBB25_3: ; RV32ZBB-NEXT: li t0, 64 ; RV32ZBB-NEXT: mv a3, a0 @@ -3315,28 +3327,28 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32ZBB-NEXT: mv a3, a7 ; RV32ZBB-NEXT: .LBB25_5: ; RV32ZBB-NEXT: sub a7, t0, a2 -; RV32ZBB-NEXT: bltu a2, a5, .LBB25_8 +; RV32ZBB-NEXT: bltu a2, a6, .LBB25_8 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: li a2, 0 -; RV32ZBB-NEXT: bgeu a7, a5, .LBB25_9 +; RV32ZBB-NEXT: bgeu a7, a6, .LBB25_9 ; RV32ZBB-NEXT: .LBB25_7: -; RV32ZBB-NEXT: sll a5, a0, a6 ; RV32ZBB-NEXT: neg a6, a7 -; RV32ZBB-NEXT: srl a0, a0, a6 -; RV32ZBB-NEXT: or a0, a0, a4 +; RV32ZBB-NEXT: srl a6, a0, a6 +; RV32ZBB-NEXT: sll a4, a0, a4 +; RV32ZBB-NEXT: or a0, a6, a5 ; RV32ZBB-NEXT: bnez a7, .LBB25_10 ; RV32ZBB-NEXT: j .LBB25_11 ; RV32ZBB-NEXT: .LBB25_8: ; RV32ZBB-NEXT: srl a2, a1, a2 -; RV32ZBB-NEXT: bltu a7, a5, .LBB25_7 +; RV32ZBB-NEXT: bltu a7, a6, .LBB25_7 ; RV32ZBB-NEXT: .LBB25_9: -; RV32ZBB-NEXT: li a5, 0 +; RV32ZBB-NEXT: li a4, 0 ; RV32ZBB-NEXT: sll a0, a0, a7 ; RV32ZBB-NEXT: beqz a7, .LBB25_11 ; RV32ZBB-NEXT: .LBB25_10: ; RV32ZBB-NEXT: mv a1, a0 ; RV32ZBB-NEXT: .LBB25_11: -; RV32ZBB-NEXT: or a0, a3, a5 +; RV32ZBB-NEXT: or a0, a3, a4 ; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; @@ -3351,16 +3363,16 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotr_64_zext: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: li a5, 32 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: sll a4, a1, a6 -; RV32XTHEADBB-NEXT: bltu a2, a5, .LBB25_2 +; RV32XTHEADBB-NEXT: li a6, 32 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: sll a5, a1, a4 +; RV32XTHEADBB-NEXT: bltu a2, a6, .LBB25_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: srl a7, a1, a2 ; RV32XTHEADBB-NEXT: j .LBB25_3 ; RV32XTHEADBB-NEXT: .LBB25_2: ; RV32XTHEADBB-NEXT: srl a3, a0, a2 -; RV32XTHEADBB-NEXT: or a7, a3, a4 +; RV32XTHEADBB-NEXT: or a7, a3, a5 ; RV32XTHEADBB-NEXT: .LBB25_3: ; RV32XTHEADBB-NEXT: li t0, 64 ; RV32XTHEADBB-NEXT: mv a3, a0 @@ -3369,28 +3381,28 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32XTHEADBB-NEXT: mv a3, a7 ; RV32XTHEADBB-NEXT: .LBB25_5: ; RV32XTHEADBB-NEXT: sub a7, t0, a2 -; RV32XTHEADBB-NEXT: bltu a2, a5, .LBB25_8 +; RV32XTHEADBB-NEXT: bltu a2, a6, .LBB25_8 ; RV32XTHEADBB-NEXT: # %bb.6: ; RV32XTHEADBB-NEXT: li a2, 0 -; RV32XTHEADBB-NEXT: bgeu a7, a5, .LBB25_9 +; RV32XTHEADBB-NEXT: bgeu a7, a6, .LBB25_9 ; RV32XTHEADBB-NEXT: .LBB25_7: -; RV32XTHEADBB-NEXT: sll a5, a0, a6 ; RV32XTHEADBB-NEXT: neg a6, a7 -; RV32XTHEADBB-NEXT: srl a0, a0, a6 -; RV32XTHEADBB-NEXT: or a0, a0, a4 +; RV32XTHEADBB-NEXT: srl a6, a0, a6 +; RV32XTHEADBB-NEXT: sll a4, a0, a4 +; RV32XTHEADBB-NEXT: or a0, a6, a5 ; RV32XTHEADBB-NEXT: bnez a7, .LBB25_10 ; RV32XTHEADBB-NEXT: j .LBB25_11 ; RV32XTHEADBB-NEXT: .LBB25_8: ; RV32XTHEADBB-NEXT: srl a2, a1, a2 -; RV32XTHEADBB-NEXT: bltu a7, a5, .LBB25_7 +; RV32XTHEADBB-NEXT: bltu a7, a6, .LBB25_7 ; RV32XTHEADBB-NEXT: .LBB25_9: -; RV32XTHEADBB-NEXT: li a5, 0 +; RV32XTHEADBB-NEXT: li a4, 0 ; RV32XTHEADBB-NEXT: sll a0, a0, a7 ; RV32XTHEADBB-NEXT: beqz a7, .LBB25_11 ; RV32XTHEADBB-NEXT: .LBB25_10: ; RV32XTHEADBB-NEXT: mv a1, a0 ; RV32XTHEADBB-NEXT: .LBB25_11: -; RV32XTHEADBB-NEXT: or a0, a3, a5 +; RV32XTHEADBB-NEXT: or a0, a3, a4 ; RV32XTHEADBB-NEXT: or a1, a2, a1 ; RV32XTHEADBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll index da95481a5e588..7e9681595fb4b 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll @@ -139,17 +139,17 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: rol_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: andi a6, a2, 63 -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: bltu a6, a4, .LBB7_2 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: bltu a6, a3, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: sll a7, a0, a6 ; CHECK-NEXT: j .LBB7_3 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: sll a3, a0, a2 -; CHECK-NEXT: neg a5, a6 -; CHECK-NEXT: srl a5, a0, a5 +; CHECK-NEXT: neg a4, a6 +; CHECK-NEXT: srl a5, a0, a4 ; CHECK-NEXT: sll a7, a1, a2 +; CHECK-NEXT: sll a4, a0, a2 ; CHECK-NEXT: or a7, a5, a7 ; CHECK-NEXT: .LBB7_3: ; CHECK-NEXT: neg a5, a2 @@ -159,28 +159,28 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB7_5: ; CHECK-NEXT: andi a6, a5, 63 -; CHECK-NEXT: bltu a6, a4, .LBB7_7 +; CHECK-NEXT: bltu a6, a3, .LBB7_7 ; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: srl a7, a1, a6 ; CHECK-NEXT: bnez a6, .LBB7_8 ; CHECK-NEXT: j .LBB7_9 ; CHECK-NEXT: .LBB7_7: -; CHECK-NEXT: srl a7, a0, a5 -; CHECK-NEXT: neg t0, a6 -; CHECK-NEXT: sll t0, a1, t0 -; CHECK-NEXT: or a7, a7, t0 +; CHECK-NEXT: neg a7, a6 +; CHECK-NEXT: srl t0, a0, a5 +; CHECK-NEXT: sll a7, a1, a7 +; CHECK-NEXT: or a7, t0, a7 ; CHECK-NEXT: beqz a6, .LBB7_9 ; CHECK-NEXT: .LBB7_8: ; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB7_9: -; CHECK-NEXT: bltu a6, a4, .LBB7_11 +; CHECK-NEXT: bltu a6, a3, .LBB7_11 ; CHECK-NEXT: # %bb.10: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB7_12 ; CHECK-NEXT: .LBB7_11: ; CHECK-NEXT: srl a1, a1, a5 ; CHECK-NEXT: .LBB7_12: -; CHECK-NEXT: or a0, a3, a0 +; CHECK-NEXT: or a0, a4, a0 ; CHECK-NEXT: or a1, a2, a1 ; CHECK-NEXT: ret %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) @@ -223,10 +223,10 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: bnez a5, .LBB9_3 ; CHECK-NEXT: j .LBB9_4 ; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: srl a3, a0, a2 -; CHECK-NEXT: neg a6, a5 -; CHECK-NEXT: sll a6, a1, a6 -; CHECK-NEXT: or a6, a3, a6 +; CHECK-NEXT: neg a3, a5 +; CHECK-NEXT: srl a6, a0, a2 +; CHECK-NEXT: sll a3, a1, a3 +; CHECK-NEXT: or a6, a6, a3 ; CHECK-NEXT: mv a3, a0 ; CHECK-NEXT: beqz a5, .LBB9_4 ; CHECK-NEXT: .LBB9_3: @@ -239,11 +239,11 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: andi a5, a6, 63 ; CHECK-NEXT: bgeu a5, a4, .LBB9_8 ; CHECK-NEXT: .LBB9_6: +; CHECK-NEXT: neg a4, a5 +; CHECK-NEXT: srl a7, a0, a4 +; CHECK-NEXT: sll t0, a1, a6 ; CHECK-NEXT: sll a4, a0, a6 -; CHECK-NEXT: neg a7, a5 -; CHECK-NEXT: srl a0, a0, a7 -; CHECK-NEXT: sll a6, a1, a6 -; CHECK-NEXT: or a0, a0, a6 +; CHECK-NEXT: or a0, a7, t0 ; CHECK-NEXT: bnez a5, .LBB9_9 ; CHECK-NEXT: j .LBB9_10 ; CHECK-NEXT: .LBB9_7: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll index 0b376dd779887..67d49057b2e3f 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll @@ -12,31 +12,31 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: addi a1, a2, 1365 -; RV32I-NEXT: srli a2, a0, 2 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a0, 4 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a0, 8 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a0, 16 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: lui a1, 349525 ; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 61681 -; RV32I-NEXT: addi a2, a2, -241 -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 ; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 61681 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: addi a1, a2, -241 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: slli a1, a0, 8 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: slli a1, a0, 16 @@ -67,10 +67,10 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: lui a6, 61681 ; RV32I-NEXT: addi a5, a2, 1365 ; RV32I-NEXT: addi a4, a3, 819 -; RV32I-NEXT: addi a3, a6, -241 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 ; RV32I-NEXT: li a2, 32 ; RV32I-NEXT: beqz a1, .LBB1_4 ; RV32I-NEXT: # %bb.2: # %cond.false @@ -162,20 +162,20 @@ define i32 @cttz_i32(i32 %a) nounwind { ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: not a1, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: and a0, a1, a0 -; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: lui a1, 349525 ; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: and a1, a2, a1 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 61681 -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 ; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 61681 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: addi a1, a2, -241 ; RV32I-NEXT: and a0, a0, a1 @@ -281,19 +281,19 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @ctpop_i32(i32 %a) nounwind { ; RV32I-LABEL: ctpop_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 61681 -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 ; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 61681 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: addi a1, a2, -241 ; RV32I-NEXT: and a0, a0, a1 @@ -320,29 +320,29 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a2, a0, 1 ; RV32I-NEXT: lui a3, 349525 ; RV32I-NEXT: lui a4, 209715 -; RV32I-NEXT: srli a5, a1, 1 ; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: srli a5, a1, 1 ; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: and a3, a5, a3 -; RV32I-NEXT: lui a5, 61681 ; RV32I-NEXT: addi a4, a4, 819 -; RV32I-NEXT: addi a5, a5, -241 ; RV32I-NEXT: sub a0, a0, a2 -; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: and a3, a5, a3 ; RV32I-NEXT: srli a2, a0, 2 ; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: srli a3, a1, 2 -; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: add a0, a2, a0 -; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: srli a3, a0, 4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: lui a2, 61681 ; RV32I-NEXT: srli a3, a1, 4 -; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: addi a2, a2, -241 ; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: and a0, a0, a5 -; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: slli a2, a0, 8 ; RV32I-NEXT: slli a3, a1, 8 ; RV32I-NEXT: add a0, a0, a2 @@ -372,9 +372,9 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64_ugt_two: ; RV32I: # %bb.0: ; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: sltiu a4, a2, -1 -; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: sltiu a3, a2, -1 +; RV32I-NEXT: addi a4, a1, -1 +; RV32I-NEXT: add a3, a4, a3 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: or a0, a0, a1 @@ -402,9 +402,9 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64_ugt_one: ; RV32I: # %bb.0: ; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: sltiu a4, a2, -1 -; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: sltiu a3, a2, -1 +; RV32I-NEXT: addi a4, a1, -1 +; RV32I-NEXT: add a3, a4, a3 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: or a0, a0, a1 @@ -763,8 +763,8 @@ define i64 @abs_i64(i64 %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: srai a2, a1, 31 ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: xor a0, a0, a2 ; CHECK-NEXT: xor a1, a1, a2 @@ -814,10 +814,10 @@ define i32 @bswap_i32(i32 %a) nounwind { ; RV32I-NEXT: srli a2, a0, 24 ; RV32I-NEXT: lui a3, 16 ; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: srli a2, a0, 8 -; RV32I-NEXT: addi a3, a3, -256 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: addi a2, a3, -256 +; RV32I-NEXT: srli a3, a0, 8 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: and a2, a3, a2 ; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: or a0, a1, a0 @@ -839,21 +839,21 @@ define i64 @bswap_i64(i64 %a) { ; RV32I-NEXT: slli a2, a1, 24 ; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -256 ; RV32I-NEXT: srli a5, a1, 8 -; RV32I-NEXT: slli a6, a0, 24 ; RV32I-NEXT: or a2, a3, a2 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: srli a6, a0, 8 -; RV32I-NEXT: addi a4, a4, -256 -; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: and a5, a5, a4 -; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: and a4, a6, a4 ; RV32I-NEXT: or a2, a2, a5 +; RV32I-NEXT: slli a3, a0, 24 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: and a5, a0, a4 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: and a0, a0, a4 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: slli a5, a0, 8 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a3, a3, a0 ; RV32I-NEXT: or a0, a2, a1 ; RV32I-NEXT: or a1, a3, a5 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll index 55cb95413ae24..d7c450b6c21c5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbkb.ll @@ -110,9 +110,9 @@ define i32 @packh_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: packh_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a2, 16 -; CHECK-NEXT: zext.b a0, a0 ; CHECK-NEXT: addi a2, a2, -256 ; CHECK-NEXT: slli a1, a1, 8 +; CHECK-NEXT: zext.b a0, a0 ; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: ret @@ -126,8 +126,8 @@ define i32 @packh_i32(i32 %a, i32 %b) nounwind { define i32 @packh_i32_2(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: packh_i32_2: ; RV32I: # %bb.0: -; RV32I-NEXT: zext.b a0, a0 ; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: zext.b a0, a0 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret @@ -148,9 +148,9 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: packh_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: zext.b a0, a0 ; CHECK-NEXT: addi a1, a1, -256 ; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: zext.b a0, a0 ; CHECK-NEXT: and a1, a2, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: li a1, 0 @@ -166,8 +166,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { define i64 @packh_i64_2(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: packh_i64_2: ; RV32I: # %bb.0: -; RV32I-NEXT: zext.b a0, a0 ; RV32I-NEXT: zext.b a1, a2 +; RV32I-NEXT: zext.b a0, a0 ; RV32I-NEXT: slli a2, a1, 8 ; RV32I-NEXT: srli a1, a1, 24 ; RV32I-NEXT: or a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll index 9c9c014e3c172..a24c186c22c85 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll @@ -426,8 +426,8 @@ entry: define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; RV64I-LABEL: sh6_sh3_add2: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: slli a2, a2, 3 ; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: slli a2, a2, 3 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret @@ -479,8 +479,8 @@ define i64 @sh6_sh3_add4(i64 noundef %x, i64 noundef %y, i64 noundef %z) { ; ; RV64ZBA-LABEL: sh6_sh3_add4: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: slli a1, a1, 6 ; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: slli a1, a1, 6 ; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret entry: @@ -1014,18 +1014,18 @@ define i64 @pack_i64(i64 %a, i64 %b) nounwind { define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: pack_i64_2: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: pack_i64_2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: zext.w a1, a1 +; RV64ZBA-NEXT: zext.w a0, a0 ; RV64ZBA-NEXT: slli a1, a1, 32 ; RV64ZBA-NEXT: or a0, a1, a0 ; RV64ZBA-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 2dd3bb3119dd3..cde14fe8cb1a1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -16,33 +16,33 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: addi a1, a2, 1365 -; RV64I-NEXT: srliw a2, a0, 2 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 4 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: li a1, 32 @@ -73,33 +73,33 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: beqz a1, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: addi a1, a2, 1365 -; RV64I-NEXT: srliw a2, a0, 2 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 4 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: li a1, 32 @@ -132,39 +132,39 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: li s0, 32 ; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: li s0, 32 ; RV64I-NEXT: li a1, 32 ; RV64I-NEXT: beqz a0, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: addi a1, a2, 1365 -; RV64I-NEXT: srliw a2, a0, 2 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 4 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: li a1, 32 @@ -178,8 +178,8 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; ; RV64ZBB-LABEL: log2_ceil_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: li a1, 32 ; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: li a1, 32 ; RV64ZBB-NEXT: clzw a0, a0 ; RV64ZBB-NEXT: subw a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -197,33 +197,33 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: srliw a0, a0, 1 -; RV64I-NEXT: lui a1, 349525 ; RV64I-NEXT: or a0, s0, a0 -; RV64I-NEXT: addi a1, a1, 1365 -; RV64I-NEXT: srliw a2, a0, 2 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 4 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 1365 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: beqz s0, .LBB3_2 ; RV64I-NEXT: # %bb.1: @@ -268,33 +268,33 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: srliw a0, a0, 2 -; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: addi a1, a2, 1365 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srliw a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: li a1, 32 @@ -341,36 +341,36 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a1, a1, 12 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: addi a1, a1, 1365 -; RV64I-NEXT: srli a2, a0, 16 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a2, a0, 32 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: srli a3, a0, 16 ; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: and a3, a3, a2 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 8 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 16 @@ -411,19 +411,19 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -454,19 +454,19 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -496,19 +496,19 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: beqz s0, .LBB8_2 ; RV64I-NEXT: # %bb.1: @@ -552,19 +552,19 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srliw a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: li a0, 0 @@ -602,42 +602,42 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB10_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: not a1, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: addi a1, a2, 1365 -; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a1, 21845 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: not a2, a0 ; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: lui a3, 13107 ; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: and a0, a2, a0 ; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: addi a2, a3, 819 ; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: addi a1, a1, 1365 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: lui a3, 3855 ; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a3, a3, 241 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: slli a3, a3, 12 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: addi a1, a3, 241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 8 ; RV64I-NEXT: add a0, a0, a1 @@ -666,24 +666,24 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: srliw a2, a0, 1 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -727,19 +727,19 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-NEXT: srli a2, a0, 1 ; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srliw a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: sraiw a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 ; RV64I-NEXT: addw a0, a1, a0 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: lui a2, 4112 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 257 ; RV64I-NEXT: call __muldi3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -763,37 +763,37 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: lui a3, 13107 ; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a3, a3, 819 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: slli a3, a3, 12 ; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a3, a3, 819 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: slli a3, a3, 12 ; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a3, a3, 819 ; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: slli a3, a3, 12 ; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a3, a3, 819 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: addi a1, a2, -241 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 241 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slli a1, a0, 8 ; RV64I-NEXT: add a0, a0, a1 @@ -1125,10 +1125,10 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srliw a2, a0, 24 ; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srliw a2, a0, 8 -; RV64I-NEXT: addi a3, a3, -256 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: addi a2, a3, -256 +; RV64I-NEXT: srliw a3, a0, 8 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: slliw a0, a0, 8 ; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: or a0, a1, a0 @@ -1152,10 +1152,10 @@ define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { ; RV64I-NEXT: srliw a3, a0, 24 ; RV64I-NEXT: lui a4, 16 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: srliw a3, a0, 8 -; RV64I-NEXT: addi a4, a4, -256 -; RV64I-NEXT: and a0, a0, a4 -; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: addi a3, a4, -256 +; RV64I-NEXT: srliw a4, a0, 8 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a3, a4, a3 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: or a0, a2, a0 @@ -1179,30 +1179,30 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-LABEL: bswap_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: addi a2, a2, -256 ; RV64I-NEXT: srli a4, a0, 40 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: addi a3, a3, -256 -; RV64I-NEXT: and a4, a4, a3 -; RV64I-NEXT: or a1, a1, a4 -; RV64I-NEXT: srli a4, a0, 24 -; RV64I-NEXT: and a4, a4, a2 -; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: and a5, a0, a2 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: slli a5, a5, 40 +; RV64I-NEXT: and a2, a4, a2 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: and a2, a0, a3 +; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: srli a4, a0, 24 +; RV64I-NEXT: lui a5, 1044480 +; RV64I-NEXT: and a3, a4, a3 +; RV64I-NEXT: and a4, a0, a5 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: lui a4, 1044480 -; RV64I-NEXT: and a3, a0, a3 -; RV64I-NEXT: slli a3, a3, 40 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a0, a0, a4 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: and a0, a0, a5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: bswap_i64: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll index ba058ca0b500a..11527d70056a5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll @@ -90,10 +90,10 @@ define i64 @pack_i64(i64 %a, i64 %b) nounwind { define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: pack_i64_2: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -140,9 +140,9 @@ define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: packh_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: addi a2, a2, -256 ; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -150,9 +150,9 @@ define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64ZBKB-LABEL: packh_i32: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: lui a2, 16 -; RV64ZBKB-NEXT: zext.b a0, a0 ; RV64ZBKB-NEXT: addi a2, a2, -256 ; RV64ZBKB-NEXT: slli a1, a1, 8 +; RV64ZBKB-NEXT: zext.b a0, a0 ; RV64ZBKB-NEXT: and a1, a1, a2 ; RV64ZBKB-NEXT: or a0, a1, a0 ; RV64ZBKB-NEXT: ret @@ -166,8 +166,8 @@ define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { define i32 @packh_i32_2(i32 %a, i32 %b) nounwind { ; RV64I-LABEL: packh_i32_2: ; RV64I: # %bb.0: -; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -188,9 +188,9 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: packh_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: addi a2, a2, -256 ; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -198,9 +198,9 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { ; RV64ZBKB-LABEL: packh_i64: ; RV64ZBKB: # %bb.0: ; RV64ZBKB-NEXT: lui a2, 16 -; RV64ZBKB-NEXT: zext.b a0, a0 ; RV64ZBKB-NEXT: addi a2, a2, -256 ; RV64ZBKB-NEXT: slli a1, a1, 8 +; RV64ZBKB-NEXT: zext.b a0, a0 ; RV64ZBKB-NEXT: and a1, a1, a2 ; RV64ZBKB-NEXT: or a0, a1, a0 ; RV64ZBKB-NEXT: ret @@ -214,8 +214,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { define i64 @packh_i64_2(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: packh_i64_2: ; RV64I: # %bb.0: -; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -300,9 +300,9 @@ define i64 @pack_i64_allWUsers(i32 signext %0, i32 signext %1, i32 signext %2) { ; RV64I-LABEL: pack_i64_allWUsers: ; RV64I: # %bb.0: ; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: slli a2, a2, 32 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a2, a2, 32 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a2, a2, 32 ; RV64I-NEXT: or a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll index daeb2e69c83bd..8e9318d736dde 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll @@ -134,14 +134,14 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB6_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt a4, a3, a1 -; RV32I-NEXT: slt a1, a1, a3 +; RV32I-NEXT: slt a0, a1, a3 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: sltu a1, a0, a2 +; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: sub a0, a4, a1 -; RV32I-NEXT: sltu a1, a4, a1 +; RV32I-NEXT: sltu a1, a4, a0 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 776eaedb2a5cc..33b9e8b0034b2 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -57,8 +57,8 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) { ; ; RV64-LABEL: test_lshr_i48_2: ; RV64: # %bb.0: -; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: ret @@ -110,8 +110,8 @@ define i32 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV32-LABEL: test_fshl_i32: ; RV32: # %bb.0: ; RV32-NEXT: not a3, a2 -; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: srl a1, a1, a3 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret @@ -119,8 +119,8 @@ define i32 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV64-LABEL: test_fshl_i32: ; RV64: # %bb.0: ; RV64-NEXT: not a3, a2 -; RV64-NEXT: sllw a0, a0, a2 ; RV64-NEXT: srliw a1, a1, 1 +; RV64-NEXT: sllw a0, a0, a2 ; RV64-NEXT: srlw a1, a1, a3 ; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll index d634cc9f6395c..f0d824742250e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll @@ -20,10 +20,10 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB0_3 ; RV32I-NEXT: j .LBB0_4 ; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a4, a1, a4 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a2, .LBB0_4 ; RV32I-NEXT: .LBB0_3: ; RV32I-NEXT: mv a0, a4 @@ -54,10 +54,10 @@ define i64 @lshr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-NEXT: bnez a2, .LBB1_3 ; RV32I-NEXT: j .LBB1_4 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a4, a1, a4 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a2, .LBB1_4 ; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: mv a0, a4 @@ -88,10 +88,10 @@ define i64 @ashr64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB2_3 ; RV32I-NEXT: j .LBB2_4 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a4, a1, a4 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a2, .LBB2_4 ; RV32I-NEXT: .LBB2_3: ; RV32I-NEXT: mv a0, a4 @@ -122,10 +122,10 @@ define i64 @ashr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-NEXT: bnez a2, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: sll a5, a1, a5 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: sll a4, a1, a4 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a2, .LBB3_4 ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: mv a0, a4 @@ -158,11 +158,11 @@ define i64 @shl64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB4_3 ; RV32I-NEXT: j .LBB4_4 ; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: srl a4, a3, a0 +; RV32I-NEXT: sll a5, a1, a2 ; RV32I-NEXT: sll a0, a3, a2 -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: sll a4, a1, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: or a3, a4, a5 ; RV32I-NEXT: beqz a2, .LBB4_4 ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: mv a1, a3 @@ -189,11 +189,11 @@ define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-NEXT: bnez a2, .LBB5_3 ; RV32I-NEXT: j .LBB5_4 ; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: srl a4, a3, a0 +; RV32I-NEXT: sll a5, a1, a2 ; RV32I-NEXT: sll a0, a3, a2 -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: sll a4, a1, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: or a3, a4, a5 ; RV32I-NEXT: beqz a2, .LBB5_4 ; RV32I-NEXT: .LBB5_3: ; RV32I-NEXT: mv a1, a3 @@ -215,9 +215,9 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a3, 8(a1) ; RV32I-NEXT: lw a7, 12(a1) ; RV32I-NEXT: li t0, 32 +; RV32I-NEXT: neg t4, a2 ; RV32I-NEXT: srl t2, a3, a2 -; RV32I-NEXT: neg t6, a2 -; RV32I-NEXT: sll t5, a7, t6 +; RV32I-NEXT: sll t6, a7, t4 ; RV32I-NEXT: bltu a2, t0, .LBB6_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a5, a7, a2 @@ -225,7 +225,7 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB6_3 ; RV32I-NEXT: j .LBB6_4 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: or a5, t2, t5 +; RV32I-NEXT: or a5, t2, t6 ; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: beqz a2, .LBB6_4 ; RV32I-NEXT: .LBB6_3: @@ -236,19 +236,19 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bltu a2, t0, .LBB6_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: srl t4, a1, a2 +; RV32I-NEXT: srl t5, a1, a2 ; RV32I-NEXT: j .LBB6_7 ; RV32I-NEXT: .LBB6_6: -; RV32I-NEXT: srl a6, a7, a2 ; RV32I-NEXT: srl t1, a5, a2 -; RV32I-NEXT: sll t3, a1, t6 -; RV32I-NEXT: or t4, t1, t3 +; RV32I-NEXT: sll t3, a1, t4 +; RV32I-NEXT: srl a6, a7, a2 +; RV32I-NEXT: or t5, t1, t3 ; RV32I-NEXT: .LBB6_7: ; RV32I-NEXT: li t1, 64 ; RV32I-NEXT: mv t3, a5 ; RV32I-NEXT: beqz a2, .LBB6_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB6_9: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill @@ -257,26 +257,26 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sub s0, t1, a2 ; RV32I-NEXT: bltu a2, t0, .LBB6_12 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bgeu s0, t0, .LBB6_13 ; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: sll t6, a3, t6 ; RV32I-NEXT: neg s1, s0 ; RV32I-NEXT: srl s1, a3, s1 -; RV32I-NEXT: or s2, s1, t5 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or s2, s1, t6 ; RV32I-NEXT: j .LBB6_14 ; RV32I-NEXT: .LBB6_12: -; RV32I-NEXT: srl t4, a1, a2 +; RV32I-NEXT: srl t5, a1, a2 ; RV32I-NEXT: bltu s0, t0, .LBB6_11 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB6_14: ; RV32I-NEXT: addi s1, a2, -64 -; RV32I-NEXT: mv t5, a7 +; RV32I-NEXT: mv t6, a7 ; RV32I-NEXT: beqz s0, .LBB6_16 ; RV32I-NEXT: # %bb.15: -; RV32I-NEXT: mv t5, s2 +; RV32I-NEXT: mv t6, s2 ; RV32I-NEXT: .LBB6_16: ; RV32I-NEXT: bltu s1, t0, .LBB6_18 ; RV32I-NEXT: # %bb.17: @@ -300,8 +300,8 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: srl a7, a7, a2 ; RV32I-NEXT: bgeu a2, t1, .LBB6_24 ; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: or a3, t3, t6 -; RV32I-NEXT: or a7, t4, t5 +; RV32I-NEXT: or a3, t3, t4 +; RV32I-NEXT: or a7, t5, t6 ; RV32I-NEXT: .LBB6_24: ; RV32I-NEXT: bnez a2, .LBB6_28 ; RV32I-NEXT: # %bb.25: @@ -335,10 +335,10 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: bnez a2, .LBB6_3 ; RV64I-NEXT: j .LBB6_4 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: srl a4, a0, a2 -; RV64I-NEXT: neg a5, a2 -; RV64I-NEXT: sll a5, a1, a5 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: srl a5, a0, a2 +; RV64I-NEXT: sll a4, a1, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: beqz a2, .LBB6_4 ; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: mv a0, a4 @@ -361,9 +361,9 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a3, 12(a1) ; RV32I-NEXT: li t0, 32 +; RV32I-NEXT: neg t5, a2 ; RV32I-NEXT: srl t2, a4, a2 -; RV32I-NEXT: neg t6, a2 -; RV32I-NEXT: sll t5, a3, t6 +; RV32I-NEXT: sll t6, a3, t5 ; RV32I-NEXT: bltu a2, t0, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sra a6, a3, a2 @@ -371,7 +371,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB7_3 ; RV32I-NEXT: j .LBB7_4 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: or a6, t2, t5 +; RV32I-NEXT: or a6, t2, t6 ; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: beqz a2, .LBB7_4 ; RV32I-NEXT: .LBB7_3: @@ -385,9 +385,9 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: srl t4, a1, a2 ; RV32I-NEXT: j .LBB7_7 ; RV32I-NEXT: .LBB7_6: -; RV32I-NEXT: sra a7, a3, a2 ; RV32I-NEXT: srl t1, a6, a2 -; RV32I-NEXT: sll t3, a1, t6 +; RV32I-NEXT: sll t3, a1, t5 +; RV32I-NEXT: sra a7, a3, a2 ; RV32I-NEXT: or t4, t1, t3 ; RV32I-NEXT: .LBB7_7: ; RV32I-NEXT: li t1, 64 @@ -406,23 +406,23 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bgeu s0, t0, .LBB7_13 ; RV32I-NEXT: .LBB7_11: -; RV32I-NEXT: sll t6, a4, t6 ; RV32I-NEXT: neg s1, s0 ; RV32I-NEXT: srl s1, a4, s1 -; RV32I-NEXT: or s2, s1, t5 +; RV32I-NEXT: sll t5, a4, t5 +; RV32I-NEXT: or s2, s1, t6 ; RV32I-NEXT: j .LBB7_14 ; RV32I-NEXT: .LBB7_12: ; RV32I-NEXT: srl t4, a1, a2 ; RV32I-NEXT: bltu s0, t0, .LBB7_11 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: sll s2, a4, s0 ; RV32I-NEXT: .LBB7_14: ; RV32I-NEXT: addi s1, a2, -64 -; RV32I-NEXT: mv t5, a3 +; RV32I-NEXT: mv t6, a3 ; RV32I-NEXT: beqz s0, .LBB7_16 ; RV32I-NEXT: # %bb.15: -; RV32I-NEXT: mv t5, s2 +; RV32I-NEXT: mv t6, s2 ; RV32I-NEXT: .LBB7_16: ; RV32I-NEXT: bltu s1, t0, .LBB7_18 ; RV32I-NEXT: # %bb.17: @@ -446,8 +446,8 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sra t0, a3, a2 ; RV32I-NEXT: bgeu a2, t1, .LBB7_24 ; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: or a4, t3, t6 -; RV32I-NEXT: or t0, t4, t5 +; RV32I-NEXT: or a4, t3, t5 +; RV32I-NEXT: or t0, t4, t6 ; RV32I-NEXT: .LBB7_24: ; RV32I-NEXT: bnez a2, .LBB7_28 ; RV32I-NEXT: # %bb.25: @@ -481,10 +481,10 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: bnez a2, .LBB7_3 ; RV64I-NEXT: j .LBB7_4 ; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: srl a4, a0, a2 -; RV64I-NEXT: neg a5, a2 -; RV64I-NEXT: sll a5, a1, a5 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: neg a4, a2 +; RV64I-NEXT: srl a5, a0, a2 +; RV64I-NEXT: sll a4, a1, a4 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: beqz a2, .LBB7_4 ; RV64I-NEXT: .LBB7_3: ; RV64I-NEXT: mv a0, a4 @@ -555,20 +555,20 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: bltu a2, t1, .LBB8_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: sll s1, t2, a2 ; RV32I-NEXT: j .LBB8_15 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: sll t6, t2, a2 -; RV32I-NEXT: srl t5, t2, t5 +; RV32I-NEXT: srl t6, t2, t5 ; RV32I-NEXT: sll s0, a1, a2 -; RV32I-NEXT: or s1, t5, s0 +; RV32I-NEXT: sll t5, t2, a2 +; RV32I-NEXT: or s1, t6, s0 ; RV32I-NEXT: .LBB8_15: ; RV32I-NEXT: addi s0, a2, -64 -; RV32I-NEXT: mv t5, a1 +; RV32I-NEXT: mv t6, a1 ; RV32I-NEXT: beqz a2, .LBB8_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: mv t5, s1 +; RV32I-NEXT: mv t6, s1 ; RV32I-NEXT: .LBB8_17: ; RV32I-NEXT: bltu s0, t1, .LBB8_19 ; RV32I-NEXT: # %bb.18: @@ -577,10 +577,10 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez s0, .LBB8_20 ; RV32I-NEXT: j .LBB8_21 ; RV32I-NEXT: .LBB8_19: +; RV32I-NEXT: neg t1, s0 +; RV32I-NEXT: srl s1, a7, t1 ; RV32I-NEXT: sll t1, a7, a2 -; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl a7, a7, s1 -; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: or a7, s1, t0 ; RV32I-NEXT: beqz s0, .LBB8_21 ; RV32I-NEXT: .LBB8_20: ; RV32I-NEXT: mv a3, a7 @@ -592,8 +592,8 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez a2, .LBB8_24 ; RV32I-NEXT: j .LBB8_25 ; RV32I-NEXT: .LBB8_23: -; RV32I-NEXT: or t1, t3, t6 -; RV32I-NEXT: or a3, t4, t5 +; RV32I-NEXT: or t1, t3, t5 +; RV32I-NEXT: or a3, t4, t6 ; RV32I-NEXT: beqz a2, .LBB8_25 ; RV32I-NEXT: .LBB8_24: ; RV32I-NEXT: mv t2, t1 @@ -620,11 +620,11 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: bnez a2, .LBB8_3 ; RV64I-NEXT: j .LBB8_4 ; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: neg a0, a2 +; RV64I-NEXT: srl a4, a3, a0 +; RV64I-NEXT: sll a5, a1, a2 ; RV64I-NEXT: sll a0, a3, a2 -; RV64I-NEXT: neg a4, a2 -; RV64I-NEXT: srl a3, a3, a4 -; RV64I-NEXT: sll a4, a1, a2 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a3, a4, a5 ; RV64I-NEXT: beqz a2, .LBB8_4 ; RV64I-NEXT: .LBB8_3: ; RV64I-NEXT: mv a1, a3 @@ -644,10 +644,10 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-NEXT: srl a6, a1, a5 ; RV32I-NEXT: j .LBB9_3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a3, a0, a2 -; RV32I-NEXT: neg a6, a5 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a6, a3, a6 +; RV32I-NEXT: neg a3, a5 +; RV32I-NEXT: srl a6, a0, a2 +; RV32I-NEXT: sll a3, a1, a3 +; RV32I-NEXT: or a6, a6, a3 ; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: beqz a5, .LBB9_5 @@ -670,11 +670,11 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-NEXT: bnez a5, .LBB9_11 ; RV32I-NEXT: j .LBB9_12 ; RV32I-NEXT: .LBB9_10: +; RV32I-NEXT: neg a4, a5 +; RV32I-NEXT: srl a7, a0, a4 +; RV32I-NEXT: sll t0, a1, a6 ; RV32I-NEXT: sll a4, a0, a6 -; RV32I-NEXT: neg a7, a5 -; RV32I-NEXT: srl a0, a0, a7 -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a7, t0 ; RV32I-NEXT: beqz a5, .LBB9_12 ; RV32I-NEXT: .LBB9_11: ; RV32I-NEXT: mv a1, a0 @@ -698,21 +698,21 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: ; RV32I-NEXT: lw t3, 0(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a3, 12(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: lw a3, 8(a1) ; RV32I-NEXT: andi t4, t3, 127 ; RV32I-NEXT: li a6, 32 ; RV32I-NEXT: neg t6, t4 -; RV32I-NEXT: sll t5, a3, t6 +; RV32I-NEXT: sll t5, a2, t6 ; RV32I-NEXT: bltu t4, a6, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a5, a3, t4 +; RV32I-NEXT: srl a5, a2, t4 ; RV32I-NEXT: j .LBB10_3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: srl a4, a2, t3 +; RV32I-NEXT: srl a4, a3, t3 ; RV32I-NEXT: or a5, a4, t5 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: mv a4, a2 +; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: beqz t4, .LBB10_5 ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a4, a5 @@ -725,9 +725,9 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: srl t2, a5, t4 ; RV32I-NEXT: j .LBB10_8 ; RV32I-NEXT: .LBB10_7: -; RV32I-NEXT: srl a1, a3, t3 ; RV32I-NEXT: srl t0, a7, t3 ; RV32I-NEXT: sll t1, a5, t6 +; RV32I-NEXT: srl a1, a2, t3 ; RV32I-NEXT: or t2, t0, t1 ; RV32I-NEXT: .LBB10_8: ; RV32I-NEXT: li t0, 64 @@ -742,15 +742,16 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sub s0, t0, t4 ; RV32I-NEXT: bltu t4, a6, .LBB10_13 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li t2, 0 ; RV32I-NEXT: bgeu s0, a6, .LBB10_14 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: sll t6, a2, t6 ; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl s1, a2, s1 +; RV32I-NEXT: srl s1, a3, s1 +; RV32I-NEXT: sll t6, a3, t6 ; RV32I-NEXT: or s2, s1, t5 ; RV32I-NEXT: j .LBB10_15 ; RV32I-NEXT: .LBB10_13: @@ -758,25 +759,25 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: bltu s0, a6, .LBB10_12 ; RV32I-NEXT: .LBB10_14: ; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: sll s2, a2, s0 +; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB10_15: ; RV32I-NEXT: addi s1, t4, -64 -; RV32I-NEXT: mv t5, a3 +; RV32I-NEXT: mv t5, a2 ; RV32I-NEXT: beqz s0, .LBB10_17 ; RV32I-NEXT: # %bb.16: ; RV32I-NEXT: mv t5, s2 ; RV32I-NEXT: .LBB10_17: ; RV32I-NEXT: bltu s1, a6, .LBB10_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: srl s2, a3, s1 +; RV32I-NEXT: srl s2, a2, s1 ; RV32I-NEXT: j .LBB10_20 ; RV32I-NEXT: .LBB10_19: -; RV32I-NEXT: srl s0, a2, t4 -; RV32I-NEXT: neg s2, s1 -; RV32I-NEXT: sll s2, a3, s2 -; RV32I-NEXT: or s2, s0, s2 +; RV32I-NEXT: neg s0, s1 +; RV32I-NEXT: srl s2, a3, t4 +; RV32I-NEXT: sll s0, a2, s0 +; RV32I-NEXT: or s2, s2, s0 ; RV32I-NEXT: .LBB10_20: -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a3 ; RV32I-NEXT: beqz s1, .LBB10_22 ; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: mv s0, s2 @@ -787,7 +788,7 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: bltu t4, t0, .LBB10_25 ; RV32I-NEXT: j .LBB10_26 ; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: srl s1, a3, t4 +; RV32I-NEXT: srl s1, a2, t4 ; RV32I-NEXT: bgeu t4, t0, .LBB10_26 ; RV32I-NEXT: .LBB10_25: ; RV32I-NEXT: or s0, t1, t6 @@ -815,8 +816,8 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: sll s3, a7, t3 ; RV32I-NEXT: j .LBB10_33 ; RV32I-NEXT: .LBB10_32: -; RV32I-NEXT: sll t4, a7, t6 ; RV32I-NEXT: sll t5, a5, t6 +; RV32I-NEXT: sll t4, a7, t6 ; RV32I-NEXT: or s3, s0, t5 ; RV32I-NEXT: .LBB10_33: ; RV32I-NEXT: sub s1, t0, t3 @@ -844,37 +845,37 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: li s1, 0 ; RV32I-NEXT: bgeu t3, a6, .LBB10_44 ; RV32I-NEXT: .LBB10_42: +; RV32I-NEXT: srl s2, a3, s2 ; RV32I-NEXT: sll s3, a2, t6 -; RV32I-NEXT: srl s2, a2, s2 ; RV32I-NEXT: sll t6, a3, t6 -; RV32I-NEXT: or s4, s2, t6 +; RV32I-NEXT: or s4, s2, s3 ; RV32I-NEXT: j .LBB10_45 ; RV32I-NEXT: .LBB10_43: ; RV32I-NEXT: srl s1, a5, s2 ; RV32I-NEXT: bltu t3, a6, .LBB10_42 ; RV32I-NEXT: .LBB10_44: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll s4, a2, t3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: sll s4, a3, t3 ; RV32I-NEXT: .LBB10_45: -; RV32I-NEXT: addi s2, t3, -64 -; RV32I-NEXT: mv t6, a3 +; RV32I-NEXT: addi s3, t3, -64 +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: beqz t3, .LBB10_47 ; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv t6, s4 +; RV32I-NEXT: mv s2, s4 ; RV32I-NEXT: .LBB10_47: -; RV32I-NEXT: bltu s2, a6, .LBB10_49 +; RV32I-NEXT: bltu s3, a6, .LBB10_49 ; RV32I-NEXT: # %bb.48: ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: sll a7, a7, s2 -; RV32I-NEXT: bnez s2, .LBB10_50 +; RV32I-NEXT: sll a7, a7, s3 +; RV32I-NEXT: bnez s3, .LBB10_50 ; RV32I-NEXT: j .LBB10_51 ; RV32I-NEXT: .LBB10_49: +; RV32I-NEXT: neg a6, s3 +; RV32I-NEXT: srl s4, a7, a6 +; RV32I-NEXT: sll s5, a5, t3 ; RV32I-NEXT: sll a6, a7, t3 -; RV32I-NEXT: neg s4, s2 -; RV32I-NEXT: srl a7, a7, s4 -; RV32I-NEXT: sll s4, a5, t3 -; RV32I-NEXT: or a7, a7, s4 -; RV32I-NEXT: beqz s2, .LBB10_51 +; RV32I-NEXT: or a7, s4, s5 +; RV32I-NEXT: beqz s3, .LBB10_51 ; RV32I-NEXT: .LBB10_50: ; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB10_51: @@ -885,26 +886,27 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: bnez t3, .LBB10_54 ; RV32I-NEXT: j .LBB10_55 ; RV32I-NEXT: .LBB10_53: -; RV32I-NEXT: or a6, s0, s3 -; RV32I-NEXT: or a5, s1, t6 +; RV32I-NEXT: or a6, s0, t6 +; RV32I-NEXT: or a5, s1, s2 ; RV32I-NEXT: beqz t3, .LBB10_55 ; RV32I-NEXT: .LBB10_54: -; RV32I-NEXT: mv a2, a6 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv a2, a5 ; RV32I-NEXT: .LBB10_55: ; RV32I-NEXT: or a5, t1, t4 ; RV32I-NEXT: or a6, t2, t5 -; RV32I-NEXT: or a2, a4, a2 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: sw a5, 0(a0) ; RV32I-NEXT: sw a6, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -918,17 +920,17 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV64I-NEXT: srl a6, a1, a3 ; RV64I-NEXT: j .LBB10_3 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: srl a3, a0, a2 -; RV64I-NEXT: neg a6, a5 -; RV64I-NEXT: sll a6, a1, a6 -; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: neg a3, a5 +; RV64I-NEXT: srl a6, a0, a2 +; RV64I-NEXT: sll a3, a1, a3 +; RV64I-NEXT: or a6, a6, a3 ; RV64I-NEXT: .LBB10_3: ; RV64I-NEXT: mv a3, a0 ; RV64I-NEXT: beqz a5, .LBB10_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a3, a6 ; RV64I-NEXT: .LBB10_5: -; RV64I-NEXT: neg a7, a2 +; RV64I-NEXT: neg a6, a2 ; RV64I-NEXT: bltu a5, a4, .LBB10_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: li a2, 0 @@ -936,25 +938,25 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV64I-NEXT: .LBB10_7: ; RV64I-NEXT: srl a2, a1, a2 ; RV64I-NEXT: .LBB10_8: -; RV64I-NEXT: andi a6, a7, 127 -; RV64I-NEXT: bltu a6, a4, .LBB10_10 +; RV64I-NEXT: andi a5, a6, 127 +; RV64I-NEXT: bltu a5, a4, .LBB10_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: li a5, 0 -; RV64I-NEXT: sub a4, a6, a4 +; RV64I-NEXT: li a6, 0 +; RV64I-NEXT: sub a4, a5, a4 ; RV64I-NEXT: sll a0, a0, a4 -; RV64I-NEXT: bnez a6, .LBB10_11 +; RV64I-NEXT: bnez a5, .LBB10_11 ; RV64I-NEXT: j .LBB10_12 ; RV64I-NEXT: .LBB10_10: -; RV64I-NEXT: sll a5, a0, a7 -; RV64I-NEXT: neg a4, a6 -; RV64I-NEXT: srl a0, a0, a4 -; RV64I-NEXT: sll a4, a1, a7 -; RV64I-NEXT: or a0, a0, a4 -; RV64I-NEXT: beqz a6, .LBB10_12 +; RV64I-NEXT: neg a4, a5 +; RV64I-NEXT: srl a4, a0, a4 +; RV64I-NEXT: sll a7, a1, a6 +; RV64I-NEXT: sll a6, a0, a6 +; RV64I-NEXT: or a0, a4, a7 +; RV64I-NEXT: beqz a5, .LBB10_12 ; RV64I-NEXT: .LBB10_11: ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: .LBB10_12: -; RV64I-NEXT: or a0, a3, a5 +; RV64I-NEXT: or a0, a3, a6 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: ret %res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll index 463883b371caf..7fd178d8f0e40 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll @@ -172,14 +172,14 @@ define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a4, a3, a1 -; RV32I-NEXT: sltu a1, a1, a3 +; RV32I-NEXT: sltu a0, a1, a3 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: sltu a1, a0, a2 +; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: sub a0, a4, a1 -; RV32I-NEXT: sltu a1, a4, a1 +; RV32I-NEXT: sltu a1, a4, a0 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index bb96ba7e5b1fb..02c2afbf646d4 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -45,17 +45,17 @@ define i32 @va1(ptr %fmt, ...) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) -; RV32-NEXT: addi a0, sp, 20 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) -; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: addi sp, sp, 48 @@ -66,21 +66,21 @@ define i32 @va1(ptr %fmt, ...) { ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -80 ; RV64-NEXT: .cfi_def_cfa_offset 80 +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi t0, sp, 8 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 4(t0) +; RV64-NEXT: lwu t0, 8(sp) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a0, t0 ; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: addi a0, sp, 8 -; RV64-NEXT: addi a1, sp, 24 -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 4(a0) -; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: srli a2, a1, 32 ; RV64-NEXT: sw a1, 8(sp) ; RV64-NEXT: sw a2, 12(sp) @@ -99,17 +99,17 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: .cfi_offset s0, -40 ; RV32-WITHFP-NEXT: addi s0, sp, 16 ; RV32-WITHFP-NEXT: .cfi_def_cfa s0, 32 +; RV32-WITHFP-NEXT: addi a0, s0, 4 +; RV32-WITHFP-NEXT: sw a0, -12(s0) +; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a1, 4(s0) ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 -; RV32-WITHFP-NEXT: sw a0, -12(s0) -; RV32-WITHFP-NEXT: lw a0, -12(s0) +; RV32-WITHFP-NEXT: addi a1, a0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a1, a0, 4 ; RV32-WITHFP-NEXT: sw a1, -12(s0) ; RV32-WITHFP-NEXT: lw a0, 0(a0) ; RV32-WITHFP-NEXT: .cfi_def_cfa sp, 48 @@ -131,21 +131,21 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: .cfi_offset s0, -80 ; RV64-WITHFP-NEXT: addi s0, sp, 32 ; RV64-WITHFP-NEXT: .cfi_def_cfa s0, 64 +; RV64-WITHFP-NEXT: addi a0, s0, 8 +; RV64-WITHFP-NEXT: addi t0, s0, -24 +; RV64-WITHFP-NEXT: sd a0, -24(s0) +; RV64-WITHFP-NEXT: lw a0, 4(t0) +; RV64-WITHFP-NEXT: lwu t0, -24(s0) +; RV64-WITHFP-NEXT: slli a0, a0, 32 +; RV64-WITHFP-NEXT: or a0, a0, t0 ; RV64-WITHFP-NEXT: sd a1, 8(s0) ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: addi a0, s0, -24 -; RV64-WITHFP-NEXT: addi a1, s0, 8 -; RV64-WITHFP-NEXT: sd a1, -24(s0) -; RV64-WITHFP-NEXT: lw a0, 4(a0) -; RV64-WITHFP-NEXT: lwu a1, -24(s0) +; RV64-WITHFP-NEXT: addi a1, a0, 4 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: slli a0, a0, 32 -; RV64-WITHFP-NEXT: or a0, a0, a1 -; RV64-WITHFP-NEXT: addi a1, a0, 4 ; RV64-WITHFP-NEXT: srli a2, a1, 32 ; RV64-WITHFP-NEXT: sw a1, -24(s0) ; RV64-WITHFP-NEXT: sw a2, -20(s0) @@ -176,10 +176,10 @@ define iXLen @va1_va_arg(ptr %fmt, ...) nounwind { ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: addi a0, sp, 20 ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) -; RV32-NEXT: addi a0, sp, 20 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: addi a0, a0, 3 @@ -197,10 +197,10 @@ define iXLen @va1_va_arg(ptr %fmt, ...) nounwind { ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: addi a0, sp, 24 ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) -; RV64-NEXT: addi a0, sp, 24 ; RV64-NEXT: sd a0, 8(sp) ; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -221,10 +221,10 @@ define iXLen @va1_va_arg(ptr %fmt, ...) nounwind { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -12(s0) ; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -247,10 +247,10 @@ define iXLen @va1_va_arg(ptr %fmt, ...) nounwind { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) +; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -24(s0) ; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -283,10 +283,10 @@ define iXLen @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; RV32-NEXT: sw a2, 8(s0) ; RV32-NEXT: sw a3, 12(s0) ; RV32-NEXT: sw a4, 16(s0) +; RV32-NEXT: addi a0, s0, 4 ; RV32-NEXT: sw a5, 20(s0) ; RV32-NEXT: sw a6, 24(s0) ; RV32-NEXT: sw a7, 28(s0) -; RV32-NEXT: addi a0, s0, 4 ; RV32-NEXT: sw a0, -16(s0) ; RV32-NEXT: lw a0, -16(s0) ; RV32-NEXT: addi a0, a0, 3 @@ -318,10 +318,10 @@ define iXLen @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; RV64-NEXT: sd a2, 16(s0) ; RV64-NEXT: sd a3, 24(s0) ; RV64-NEXT: sd a4, 32(s0) +; RV64-NEXT: addi a0, s0, 8 ; RV64-NEXT: sd a5, 40(s0) ; RV64-NEXT: sd a6, 48(s0) ; RV64-NEXT: sd a7, 56(s0) -; RV64-NEXT: addi a0, s0, 8 ; RV64-NEXT: sd a0, -32(s0) ; RV64-NEXT: ld a0, -32(s0) ; RV64-NEXT: addi a0, a0, 7 @@ -353,10 +353,10 @@ define iXLen @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -16(s0) ; RV32-WITHFP-NEXT: lw a0, -16(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -388,10 +388,10 @@ define iXLen @va1_va_arg_alloca(ptr %fmt, ...) nounwind { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) +; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -32(s0) ; RV64-WITHFP-NEXT: ld a0, -32(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -513,13 +513,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-LABEL: va2: ; ILP32: # %bb.0: ; ILP32-NEXT: addi sp, sp, -48 +; ILP32-NEXT: addi a0, sp, 20 +; ILP32-NEXT: sw a0, 12(sp) +; ILP32-NEXT: lw a0, 12(sp) ; ILP32-NEXT: sw a1, 20(sp) ; ILP32-NEXT: sw a2, 24(sp) ; ILP32-NEXT: sw a3, 28(sp) ; ILP32-NEXT: sw a4, 32(sp) -; ILP32-NEXT: addi a0, sp, 20 -; ILP32-NEXT: sw a0, 12(sp) -; ILP32-NEXT: lw a0, 12(sp) ; ILP32-NEXT: sw a5, 36(sp) ; ILP32-NEXT: sw a6, 40(sp) ; ILP32-NEXT: sw a7, 44(sp) @@ -535,17 +535,17 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32D-ILP32-LABEL: va2: ; RV32D-ILP32: # %bb.0: ; RV32D-ILP32-NEXT: addi sp, sp, -48 +; RV32D-ILP32-NEXT: addi a0, sp, 20 +; RV32D-ILP32-NEXT: sw a0, 12(sp) +; RV32D-ILP32-NEXT: lw a0, 12(sp) ; RV32D-ILP32-NEXT: sw a1, 20(sp) ; RV32D-ILP32-NEXT: sw a2, 24(sp) ; RV32D-ILP32-NEXT: sw a3, 28(sp) ; RV32D-ILP32-NEXT: sw a4, 32(sp) -; RV32D-ILP32-NEXT: addi a0, sp, 20 -; RV32D-ILP32-NEXT: sw a0, 12(sp) -; RV32D-ILP32-NEXT: lw a0, 12(sp) +; RV32D-ILP32-NEXT: addi a1, a0, 7 ; RV32D-ILP32-NEXT: sw a5, 36(sp) ; RV32D-ILP32-NEXT: sw a6, 40(sp) ; RV32D-ILP32-NEXT: sw a7, 44(sp) -; RV32D-ILP32-NEXT: addi a1, a0, 7 ; RV32D-ILP32-NEXT: andi a1, a1, -8 ; RV32D-ILP32-NEXT: fld fa5, 0(a1) ; RV32D-ILP32-NEXT: addi a0, a0, 15 @@ -559,17 +559,17 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32D-ILP32F-LABEL: va2: ; RV32D-ILP32F: # %bb.0: ; RV32D-ILP32F-NEXT: addi sp, sp, -48 +; RV32D-ILP32F-NEXT: addi a0, sp, 20 +; RV32D-ILP32F-NEXT: sw a0, 12(sp) +; RV32D-ILP32F-NEXT: lw a0, 12(sp) ; RV32D-ILP32F-NEXT: sw a1, 20(sp) ; RV32D-ILP32F-NEXT: sw a2, 24(sp) ; RV32D-ILP32F-NEXT: sw a3, 28(sp) ; RV32D-ILP32F-NEXT: sw a4, 32(sp) -; RV32D-ILP32F-NEXT: addi a0, sp, 20 -; RV32D-ILP32F-NEXT: sw a0, 12(sp) -; RV32D-ILP32F-NEXT: lw a0, 12(sp) +; RV32D-ILP32F-NEXT: addi a1, a0, 7 ; RV32D-ILP32F-NEXT: sw a5, 36(sp) ; RV32D-ILP32F-NEXT: sw a6, 40(sp) ; RV32D-ILP32F-NEXT: sw a7, 44(sp) -; RV32D-ILP32F-NEXT: addi a1, a0, 7 ; RV32D-ILP32F-NEXT: andi a1, a1, -8 ; RV32D-ILP32F-NEXT: fld fa5, 0(a1) ; RV32D-ILP32F-NEXT: addi a0, a0, 15 @@ -583,17 +583,17 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32D-ILP32D-LABEL: va2: ; RV32D-ILP32D: # %bb.0: ; RV32D-ILP32D-NEXT: addi sp, sp, -48 +; RV32D-ILP32D-NEXT: addi a0, sp, 20 +; RV32D-ILP32D-NEXT: sw a0, 12(sp) +; RV32D-ILP32D-NEXT: lw a0, 12(sp) ; RV32D-ILP32D-NEXT: sw a1, 20(sp) ; RV32D-ILP32D-NEXT: sw a2, 24(sp) ; RV32D-ILP32D-NEXT: sw a3, 28(sp) ; RV32D-ILP32D-NEXT: sw a4, 32(sp) -; RV32D-ILP32D-NEXT: addi a0, sp, 20 -; RV32D-ILP32D-NEXT: sw a0, 12(sp) -; RV32D-ILP32D-NEXT: lw a0, 12(sp) +; RV32D-ILP32D-NEXT: addi a1, a0, 7 ; RV32D-ILP32D-NEXT: sw a5, 36(sp) ; RV32D-ILP32D-NEXT: sw a6, 40(sp) ; RV32D-ILP32D-NEXT: sw a7, 44(sp) -; RV32D-ILP32D-NEXT: addi a1, a0, 7 ; RV32D-ILP32D-NEXT: andi a1, a1, -8 ; RV32D-ILP32D-NEXT: fld fa5, 0(a1) ; RV32D-ILP32D-NEXT: addi a0, a0, 15 @@ -607,13 +607,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV64-LABEL: va2: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: addi a0, sp, 24 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) @@ -631,13 +631,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: addi s0, sp, 16 +; RV32-WITHFP-NEXT: addi a0, s0, 4 +; RV32-WITHFP-NEXT: sw a0, -12(s0) +; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a1, 4(s0) ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 -; RV32-WITHFP-NEXT: sw a0, -12(s0) -; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) @@ -658,13 +658,13 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: addi s0, sp, 32 +; RV64-WITHFP-NEXT: addi a0, s0, 8 +; RV64-WITHFP-NEXT: sd a0, -24(s0) +; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: sd a1, 8(s0) ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: addi a0, s0, 8 -; RV64-WITHFP-NEXT: sd a0, -24(s0) -; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) @@ -704,10 +704,10 @@ define iXLen @va2_va_arg(ptr %fmt, ...) nounwind { ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: addi a0, sp, 20 ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) -; RV32-NEXT: addi a0, sp, 20 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: addi a0, a0, 3 @@ -725,10 +725,10 @@ define iXLen @va2_va_arg(ptr %fmt, ...) nounwind { ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: addi a0, sp, 24 ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) -; RV64-NEXT: addi a0, sp, 24 ; RV64-NEXT: sd a0, 8(sp) ; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -749,10 +749,10 @@ define iXLen @va2_va_arg(ptr %fmt, ...) nounwind { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -12(s0) ; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -775,10 +775,10 @@ define iXLen @va2_va_arg(ptr %fmt, ...) nounwind { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) +; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -24(s0) ; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -885,8 +885,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-NEXT: sw a4, 32(sp) ; RV32D-ILP32-NEXT: sw a5, 36(sp) ; RV32D-ILP32-NEXT: sw a6, 40(sp) -; RV32D-ILP32-NEXT: sw a7, 44(sp) ; RV32D-ILP32-NEXT: addi a3, a0, 7 +; RV32D-ILP32-NEXT: sw a7, 44(sp) ; RV32D-ILP32-NEXT: andi a3, a3, -8 ; RV32D-ILP32-NEXT: fld fa5, 0(a3) ; RV32D-ILP32-NEXT: addi a0, a0, 15 @@ -911,8 +911,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32F-NEXT: sw a4, 32(sp) ; RV32D-ILP32F-NEXT: sw a5, 36(sp) ; RV32D-ILP32F-NEXT: sw a6, 40(sp) -; RV32D-ILP32F-NEXT: sw a7, 44(sp) ; RV32D-ILP32F-NEXT: addi a3, a0, 7 +; RV32D-ILP32F-NEXT: sw a7, 44(sp) ; RV32D-ILP32F-NEXT: andi a3, a3, -8 ; RV32D-ILP32F-NEXT: fld fa5, 0(a3) ; RV32D-ILP32F-NEXT: addi a0, a0, 15 @@ -937,8 +937,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32D-NEXT: sw a4, 32(sp) ; RV32D-ILP32D-NEXT: sw a5, 36(sp) ; RV32D-ILP32D-NEXT: sw a6, 40(sp) -; RV32D-ILP32D-NEXT: sw a7, 44(sp) ; RV32D-ILP32D-NEXT: addi a3, a0, 7 +; RV32D-ILP32D-NEXT: sw a7, 44(sp) ; RV32D-ILP32D-NEXT: andi a3, a3, -8 ; RV32D-ILP32D-NEXT: fld fa5, 0(a3) ; RV32D-ILP32D-NEXT: addi a0, a0, 15 @@ -1056,9 +1056,9 @@ define iXLen @va3_va_arg(iXLen %a, iXLen %b, ...) nounwind { ; RV32-NEXT: sw a3, 12(sp) ; RV32-NEXT: sw a4, 16(sp) ; RV32-NEXT: sw a5, 20(sp) +; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: sw a6, 24(sp) ; RV32-NEXT: sw a7, 28(sp) -; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: sw a0, 4(sp) ; RV32-NEXT: lw a0, 4(sp) ; RV32-NEXT: addi a0, a0, 3 @@ -1077,9 +1077,9 @@ define iXLen @va3_va_arg(iXLen %a, iXLen %b, ...) nounwind { ; RV64-NEXT: sd a3, 24(sp) ; RV64-NEXT: sd a4, 32(sp) ; RV64-NEXT: sd a5, 40(sp) +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: sd a6, 48(sp) ; RV64-NEXT: sd a7, 56(sp) -; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: sd a0, 8(sp) ; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -1101,9 +1101,9 @@ define iXLen @va3_va_arg(iXLen %a, iXLen %b, ...) nounwind { ; RV32-WITHFP-NEXT: sw a3, 4(s0) ; RV32-WITHFP-NEXT: sw a4, 8(s0) ; RV32-WITHFP-NEXT: sw a5, 12(s0) +; RV32-WITHFP-NEXT: mv a0, s0 ; RV32-WITHFP-NEXT: sw a6, 16(s0) ; RV32-WITHFP-NEXT: sw a7, 20(s0) -; RV32-WITHFP-NEXT: mv a0, s0 ; RV32-WITHFP-NEXT: sw a0, -12(s0) ; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -1127,9 +1127,9 @@ define iXLen @va3_va_arg(iXLen %a, iXLen %b, ...) nounwind { ; RV64-WITHFP-NEXT: sd a3, 8(s0) ; RV64-WITHFP-NEXT: sd a4, 16(s0) ; RV64-WITHFP-NEXT: sd a5, 24(s0) +; RV64-WITHFP-NEXT: mv a0, s0 ; RV64-WITHFP-NEXT: sd a6, 32(s0) ; RV64-WITHFP-NEXT: sd a7, 40(s0) -; RV64-WITHFP-NEXT: mv a0, s0 ; RV64-WITHFP-NEXT: sd a0, -24(s0) ; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -1169,9 +1169,9 @@ define void @va3_caller() nounwind { ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a1, 5 +; RV64-NEXT: lui a2, 5 +; RV64-NEXT: addi a2, a2, -480 ; RV64-NEXT: li a0, 2 -; RV64-NEXT: addi a2, a1, -480 ; RV64-NEXT: li a1, 1111 ; RV64-NEXT: call va3 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -1201,9 +1201,9 @@ define void @va3_caller() nounwind { ; RV64-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: addi s0, sp, 16 -; RV64-WITHFP-NEXT: lui a1, 5 +; RV64-WITHFP-NEXT: lui a2, 5 +; RV64-WITHFP-NEXT: addi a2, a2, -480 ; RV64-WITHFP-NEXT: li a0, 2 -; RV64-WITHFP-NEXT: addi a2, a1, -480 ; RV64-WITHFP-NEXT: li a1, 1111 ; RV64-WITHFP-NEXT: call va3 ; RV64-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -1227,10 +1227,10 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: sw a3, 44(sp) ; RV32-NEXT: sw a4, 48(sp) +; RV32-NEXT: addi a0, sp, 36 ; RV32-NEXT: sw a5, 52(sp) ; RV32-NEXT: sw a6, 56(sp) ; RV32-NEXT: sw a7, 60(sp) -; RV32-NEXT: addi a0, sp, 36 ; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lw a0, 16(sp) ; RV32-NEXT: addi a0, a0, 3 @@ -1249,16 +1249,16 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: addi a1, a1, 3 ; RV32-NEXT: and a1, a1, s0 ; RV32-NEXT: addi a2, a1, 4 +; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: lw a2, 16(sp) -; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: addi a2, a2, 3 ; RV32-NEXT: andi a2, a2, -4 ; RV32-NEXT: addi a3, a2, 4 +; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: sw a3, 16(sp) ; RV32-NEXT: lw a2, 0(a2) ; RV32-NEXT: add a0, a0, s1 @@ -1280,10 +1280,10 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-NEXT: sd a2, 64(sp) ; RV64-NEXT: sd a3, 72(sp) ; RV64-NEXT: sd a4, 80(sp) +; RV64-NEXT: addi a0, sp, 56 ; RV64-NEXT: sd a5, 88(sp) ; RV64-NEXT: sd a6, 96(sp) ; RV64-NEXT: sd a7, 104(sp) -; RV64-NEXT: addi a0, sp, 56 ; RV64-NEXT: sd a0, 16(sp) ; RV64-NEXT: ld a0, 16(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -1305,16 +1305,16 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-NEXT: addi a1, a0, 8 ; RV64-NEXT: sd a1, 16(sp) ; RV64-NEXT: ld a1, 16(sp) -; RV64-NEXT: ld a0, 0(a0) ; RV64-NEXT: addi a1, a1, 7 ; RV64-NEXT: and a1, a1, s0 ; RV64-NEXT: addi a2, a1, 8 +; RV64-NEXT: ld a0, 0(a0) ; RV64-NEXT: sd a2, 16(sp) ; RV64-NEXT: ld a2, 16(sp) -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: addi a2, a2, 7 ; RV64-NEXT: andi a2, a2, -8 ; RV64-NEXT: addi a3, a2, 8 +; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: sd a3, 16(sp) ; RV64-NEXT: ld a2, 0(a2) ; RV64-NEXT: add a0, a0, s1 @@ -1338,10 +1338,10 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -20(s0) ; RV32-WITHFP-NEXT: lw a0, -20(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -1360,16 +1360,16 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-WITHFP-NEXT: addi a1, a0, 4 ; RV32-WITHFP-NEXT: sw a1, -20(s0) ; RV32-WITHFP-NEXT: lw a1, -20(s0) -; RV32-WITHFP-NEXT: lw a0, 0(a0) ; RV32-WITHFP-NEXT: addi a1, a1, 3 ; RV32-WITHFP-NEXT: and a1, a1, s1 ; RV32-WITHFP-NEXT: addi a2, a1, 4 +; RV32-WITHFP-NEXT: lw a0, 0(a0) ; RV32-WITHFP-NEXT: sw a2, -20(s0) ; RV32-WITHFP-NEXT: lw a2, -20(s0) -; RV32-WITHFP-NEXT: lw a1, 0(a1) ; RV32-WITHFP-NEXT: addi a2, a2, 3 ; RV32-WITHFP-NEXT: andi a2, a2, -4 ; RV32-WITHFP-NEXT: addi a3, a2, 4 +; RV32-WITHFP-NEXT: lw a1, 0(a1) ; RV32-WITHFP-NEXT: sw a3, -20(s0) ; RV32-WITHFP-NEXT: lw a2, 0(a2) ; RV32-WITHFP-NEXT: add a0, a0, s2 @@ -1394,10 +1394,10 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) +; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -40(s0) ; RV64-WITHFP-NEXT: ld a0, -40(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -1419,16 +1419,16 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-WITHFP-NEXT: addi a1, a0, 8 ; RV64-WITHFP-NEXT: sd a1, -40(s0) ; RV64-WITHFP-NEXT: ld a1, -40(s0) -; RV64-WITHFP-NEXT: ld a0, 0(a0) ; RV64-WITHFP-NEXT: addi a1, a1, 7 ; RV64-WITHFP-NEXT: and a1, a1, s1 ; RV64-WITHFP-NEXT: addi a2, a1, 8 +; RV64-WITHFP-NEXT: ld a0, 0(a0) ; RV64-WITHFP-NEXT: sd a2, -40(s0) ; RV64-WITHFP-NEXT: ld a2, -40(s0) -; RV64-WITHFP-NEXT: ld a1, 0(a1) ; RV64-WITHFP-NEXT: addi a2, a2, 7 ; RV64-WITHFP-NEXT: andi a2, a2, -8 ; RV64-WITHFP-NEXT: addi a3, a2, 8 +; RV64-WITHFP-NEXT: ld a1, 0(a1) ; RV64-WITHFP-NEXT: sd a3, -40(s0) ; RV64-WITHFP-NEXT: ld a2, 0(a2) ; RV64-WITHFP-NEXT: add a0, a0, s2 @@ -1474,11 +1474,11 @@ define iXLen @va6_no_fixed_args(...) nounwind { ; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: sw a4, 32(sp) ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) -; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: addi a0, a0, 3 @@ -1496,11 +1496,11 @@ define iXLen @va6_no_fixed_args(...) nounwind { ; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: sd a4, 48(sp) ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) -; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: sd a0, 8(sp) ; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -1521,11 +1521,11 @@ define iXLen @va6_no_fixed_args(...) nounwind { ; RV32-WITHFP-NEXT: sw a1, 4(s0) ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) +; RV32-WITHFP-NEXT: mv a0, s0 ; RV32-WITHFP-NEXT: sw a4, 16(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: mv a0, s0 ; RV32-WITHFP-NEXT: sw a0, -12(s0) ; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a0, a0, 3 @@ -1548,11 +1548,11 @@ define iXLen @va6_no_fixed_args(...) nounwind { ; RV64-WITHFP-NEXT: sd a1, 8(s0) ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) +; RV64-WITHFP-NEXT: mv a0, s0 ; RV64-WITHFP-NEXT: sd a4, 32(s0) ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: mv a0, s0 ; RV64-WITHFP-NEXT: sd a0, -24(s0) ; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -1581,32 +1581,32 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa_offset 100000048 ; RV32-NEXT: lui a0, 24414 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: sw a1, 276(a0) -; RV32-NEXT: lui a0, 24414 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: sw a2, 280(a0) -; RV32-NEXT: lui a0, 24414 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: sw a3, 284(a0) -; RV32-NEXT: lui a0, 24414 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: sw a4, 288(a0) -; RV32-NEXT: lui a0, 24414 ; RV32-NEXT: addi a0, a0, 276 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: lui t0, 24414 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: sw a1, 276(t0) ; RV32-NEXT: lui a1, 24414 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a5, 292(a1) +; RV32-NEXT: sw a2, 280(a1) ; RV32-NEXT: lui a1, 24414 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a6, 296(a1) +; RV32-NEXT: sw a3, 284(a1) ; RV32-NEXT: lui a1, 24414 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a7, 300(a1) +; RV32-NEXT: sw a4, 288(a1) ; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: lui a2, 24414 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a5, 292(a2) +; RV32-NEXT: lui a2, 24414 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a6, 296(a2) +; RV32-NEXT: lui a2, 24414 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: sw a7, 300(a2) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: lui a1, 24414 @@ -1622,24 +1622,27 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa_offset 100000080 ; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: addi a0, a0, 280 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a1, 280(a0) -; RV64-NEXT: lui a0, 24414 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a2, 288(a0) -; RV64-NEXT: lui a0, 24414 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a3, 296(a0) -; RV64-NEXT: lui a0, 24414 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: sd a4, 304(a0) -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi t0, sp, 8 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 4(t0) +; RV64-NEXT: lwu t0, 8(sp) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a0, t0 +; RV64-NEXT: lui t0, 24414 +; RV64-NEXT: add t0, sp, t0 +; RV64-NEXT: sd a1, 280(t0) ; RV64-NEXT: lui a1, 24414 -; RV64-NEXT: addi a1, a1, 280 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 4(a0) -; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: sd a2, 288(a1) +; RV64-NEXT: lui a1, 24414 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: sd a3, 296(a1) +; RV64-NEXT: lui a1, 24414 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: sd a4, 304(a1) +; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: sd a5, 312(a2) @@ -1649,9 +1652,6 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: lui a2, 24414 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: sd a7, 328(a2) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: srli a2, a1, 32 ; RV64-NEXT: sw a1, 8(sp) ; RV64-NEXT: sw a2, 12(sp) @@ -1678,19 +1678,19 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: lui a0, 24414 ; RV32-WITHFP-NEXT: addi a0, a0, 272 ; RV32-WITHFP-NEXT: sub a0, s0, a0 +; RV32-WITHFP-NEXT: addi t0, s0, 4 +; RV32-WITHFP-NEXT: sw t0, 0(a0) +; RV32-WITHFP-NEXT: lw t0, 0(a0) ; RV32-WITHFP-NEXT: sw a1, 4(s0) ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a1, s0, 4 -; RV32-WITHFP-NEXT: sw a1, 0(a0) -; RV32-WITHFP-NEXT: lw a1, 0(a0) +; RV32-WITHFP-NEXT: addi a1, t0, 4 ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) -; RV32-WITHFP-NEXT: addi a2, a1, 4 -; RV32-WITHFP-NEXT: sw a2, 0(a0) -; RV32-WITHFP-NEXT: lw a0, 0(a1) +; RV32-WITHFP-NEXT: sw a1, 0(a0) +; RV32-WITHFP-NEXT: lw a0, 0(t0) ; RV32-WITHFP-NEXT: lui a1, 24414 ; RV32-WITHFP-NEXT: addi a1, a1, -1728 ; RV32-WITHFP-NEXT: add sp, sp, a1 @@ -1719,24 +1719,24 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: lui a0, 24414 ; RV64-WITHFP-NEXT: addi a0, a0, 288 ; RV64-WITHFP-NEXT: sub a0, s0, a0 +; RV64-WITHFP-NEXT: addi t0, s0, 8 +; RV64-WITHFP-NEXT: sd t0, 0(a0) +; RV64-WITHFP-NEXT: lw t0, 4(a0) +; RV64-WITHFP-NEXT: lwu t1, 0(a0) +; RV64-WITHFP-NEXT: slli t0, t0, 32 +; RV64-WITHFP-NEXT: or t0, t0, t1 ; RV64-WITHFP-NEXT: sd a1, 8(s0) ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: addi a1, s0, 8 -; RV64-WITHFP-NEXT: sd a1, 0(a0) -; RV64-WITHFP-NEXT: lwu a1, 0(a0) -; RV64-WITHFP-NEXT: lw a2, 4(a0) +; RV64-WITHFP-NEXT: addi a1, t0, 4 ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) -; RV64-WITHFP-NEXT: slli a2, a2, 32 -; RV64-WITHFP-NEXT: or a1, a2, a1 -; RV64-WITHFP-NEXT: addi a2, a1, 4 -; RV64-WITHFP-NEXT: srli a3, a2, 32 -; RV64-WITHFP-NEXT: sw a2, 0(a0) -; RV64-WITHFP-NEXT: sw a3, 4(a0) -; RV64-WITHFP-NEXT: lw a0, 0(a1) +; RV64-WITHFP-NEXT: srli a2, a1, 32 +; RV64-WITHFP-NEXT: sw a1, 0(a0) +; RV64-WITHFP-NEXT: sw a2, 4(a0) +; RV64-WITHFP-NEXT: lw a0, 0(t0) ; RV64-WITHFP-NEXT: lui a1, 24414 ; RV64-WITHFP-NEXT: addi a1, a1, -1680 ; RV64-WITHFP-NEXT: add sp, sp, a1 @@ -1865,16 +1865,17 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -36 +; RV32-NEXT: addi t0, sp, 20 +; RV32-NEXT: sw t0, 8(sp) +; RV32-NEXT: lw t0, 8(sp) ; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) -; RV32-NEXT: addi a1, sp, 20 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lw a1, 8(sp) ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: mv a1, t0 ; RV32-NEXT: call va_vprintf ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra @@ -1888,16 +1889,17 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV64-NEXT: .cfi_def_cfa_offset 80 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -72 +; RV64-NEXT: addi t0, sp, 24 +; RV64-NEXT: sd t0, 0(sp) +; RV64-NEXT: ld t0, 0(sp) ; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: addi a1, sp, 24 -; RV64-NEXT: sd a1, 0(sp) -; RV64-NEXT: ld a1, 0(sp) ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: mv a1, t0 ; RV64-NEXT: call va_vprintf ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra @@ -1915,16 +1917,17 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: .cfi_offset s0, -40 ; RV32-WITHFP-NEXT: addi s0, sp, 16 ; RV32-WITHFP-NEXT: .cfi_def_cfa s0, 32 +; RV32-WITHFP-NEXT: addi t0, s0, 4 +; RV32-WITHFP-NEXT: sw t0, -12(s0) +; RV32-WITHFP-NEXT: lw t0, -12(s0) ; RV32-WITHFP-NEXT: sw a1, 4(s0) ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a1, s0, 4 -; RV32-WITHFP-NEXT: sw a1, -12(s0) -; RV32-WITHFP-NEXT: lw a1, -12(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) +; RV32-WITHFP-NEXT: mv a1, t0 ; RV32-WITHFP-NEXT: call va_vprintf ; RV32-WITHFP-NEXT: .cfi_def_cfa sp, 48 ; RV32-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1945,16 +1948,17 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: .cfi_offset s0, -80 ; RV64-WITHFP-NEXT: addi s0, sp, 32 ; RV64-WITHFP-NEXT: .cfi_def_cfa s0, 64 +; RV64-WITHFP-NEXT: addi t0, s0, 8 +; RV64-WITHFP-NEXT: sd t0, -24(s0) +; RV64-WITHFP-NEXT: ld t0, -24(s0) ; RV64-WITHFP-NEXT: sd a1, 8(s0) ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: addi a1, s0, 8 -; RV64-WITHFP-NEXT: sd a1, -24(s0) -; RV64-WITHFP-NEXT: ld a1, -24(s0) ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) +; RV64-WITHFP-NEXT: mv a1, t0 ; RV64-WITHFP-NEXT: call va_vprintf ; RV64-WITHFP-NEXT: .cfi_def_cfa sp, 96 ; RV64-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll index ca9f7637388f7..a04ca8d1d5d9d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -7,34 +7,34 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a0, 3(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a0, 2(a0) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a0, a5, a0 ; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a6, t0 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a1, a7, a1 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a1, a4, a1 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: srlw a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: slli a3, a0, 48 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srliw a3, a0, 16 +; RV64I-NEXT: srli a1, a1, 48 +; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: srli a3, a3, 48 -; RV64I-NEXT: srli a3, a3, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a4, 3(a2) ; RV64I-NEXT: ret ; @@ -42,34 +42,34 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a0, 2(a0) +; RV32I-NEXT: lbu a6, 3(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a4, a6, t0 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a1, a7, a1 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a3, a0, 16 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: srli a3, a3, 16 -; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) ; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -84,34 +84,34 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a0, 3(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a0, 2(a0) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a0, a5, a0 ; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a6, t0 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a1, a7, a1 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a1, a4, a1 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sllw a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: slli a3, a0, 48 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srliw a3, a0, 16 +; RV64I-NEXT: srli a1, a1, 48 +; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: srli a3, a3, 48 -; RV64I-NEXT: srli a3, a3, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a4, 3(a2) ; RV64I-NEXT: ret ; @@ -119,34 +119,34 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a0, 2(a0) +; RV32I-NEXT: lbu a6, 3(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a4, a6, t0 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a1, a7, a1 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sll a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a3, a0, 16 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: srli a3, a3, 16 -; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) ; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -161,34 +161,34 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a3, 1(a0) ; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a0, 3(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a0, 2(a0) +; RV64I-NEXT: lbu a6, 3(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a0, a5, a0 ; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a6, t0 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a1, a7, a1 +; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a1, a4, a1 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sraw a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: slli a3, a0, 48 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srliw a3, a0, 16 +; RV64I-NEXT: srli a1, a1, 48 +; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: srli a3, a3, 48 -; RV64I-NEXT: srli a3, a3, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: sb a1, 2(a2) +; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: sb a4, 3(a2) ; RV64I-NEXT: ret ; @@ -196,34 +196,34 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a0, 2(a0) +; RV32I-NEXT: lbu a6, 3(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a4, a6, t0 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a1, a7, a1 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sra a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a3, a0, 16 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: srli a3, a3, 16 -; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) ; RV32I-NEXT: sb a4, 3(a2) ; RV32I-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -237,146 +237,146 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_8bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 5(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: lbu a0, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addi a4, a4, -1 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu t1, 3(a1) +; RV64I-NEXT: lbu t2, 4(a1) +; RV64I-NEXT: lbu t3, 5(a1) +; RV64I-NEXT: lbu t4, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a7, a7, t0 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, t1, a6 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, t4 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t3, t2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: srl a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srliw a3, a0, 16 -; RV64I-NEXT: and a5, a0, a4 -; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: srli a3, a0, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a1 ; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: and a4, a1, a4 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: sb a5, 1(a2) -; RV64I-NEXT: sb a3, 2(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) ; RV32I-NEXT: lbu a7, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a5, a3, a5 +; RV32I-NEXT: or a3, a4, a6 +; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t1, a0, t1 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a4, t0, a7 +; RV32I-NEXT: or a0, a0, t1 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a0, a4, 16 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: slli a3, t1, 16 +; RV32I-NEXT: or a6, t3, t2 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: slli a7, a3, 16 ; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: slli t0, a0, 16 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: bltu a1, a4, .LBB3_2 +; RV32I-NEXT: li a3, 32 +; RV32I-NEXT: or a0, a7, a5 +; RV32I-NEXT: or a4, t0, a4 +; RV32I-NEXT: bltu a1, a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a5, a3, a1 +; RV32I-NEXT: srl a5, a4, a1 ; RV32I-NEXT: bnez a1, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a5, a0, a1 -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: sll a6, a3, a6 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: neg a5, a1 +; RV32I-NEXT: srl a6, a0, a1 +; RV32I-NEXT: sll a5, a4, a5 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: beqz a1, .LBB3_4 ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: mv a0, a5 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: bltu a1, a4, .LBB3_6 +; RV32I-NEXT: bltu a1, a3, .LBB3_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB3_7 ; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: srl a1, a3, a1 +; RV32I-NEXT: srl a1, a4, a1 ; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: lui a4, 16 -; RV32I-NEXT: srli a5, a0, 24 -; RV32I-NEXT: srli a6, a1, 16 -; RV32I-NEXT: srli a7, a1, 24 -; RV32I-NEXT: addi a4, a4, -1 -; RV32I-NEXT: and t0, a0, a4 -; RV32I-NEXT: and a4, a1, a4 -; RV32I-NEXT: srli t0, t0, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a6, a0, 24 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb t0, 1(a2) -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: sb a5, 3(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb a6, 6(a2) -; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: sb a3, 5(a2) +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -388,107 +388,107 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_8bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 5(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: lbu a0, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addi a4, a4, -1 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu t1, 3(a1) +; RV64I-NEXT: lbu t2, 4(a1) +; RV64I-NEXT: lbu t3, 5(a1) +; RV64I-NEXT: lbu t4, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a7, a7, t0 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, t1, a6 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, t4 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t3, t2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sll a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srliw a3, a0, 16 -; RV64I-NEXT: and a5, a0, a4 -; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: srli a3, a0, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a1 ; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: and a4, a1, a4 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: sb a5, 1(a2) -; RV64I-NEXT: sb a3, 2(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) ; RV32I-NEXT: lbu a7, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a5, a3, a5 +; RV32I-NEXT: or a3, a4, a6 +; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a6, t0, a7 ; RV32I-NEXT: or a0, a0, t1 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: or a4, t3, t2 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: slli a7, a3, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a3, a1, a6 -; RV32I-NEXT: slli a3, a3, 3 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a3, a1, 3 ; RV32I-NEXT: li a1, 32 -; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a4, a7, a5 +; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: bltu a3, a1, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a1, 0 @@ -496,33 +496,33 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: bnez a3, .LBB4_3 ; RV32I-NEXT: j .LBB4_4 ; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: neg a1, a3 +; RV32I-NEXT: srl a5, a4, a1 +; RV32I-NEXT: sll a6, a0, a3 ; RV32I-NEXT: sll a1, a4, a3 -; RV32I-NEXT: neg a5, a3 -; RV32I-NEXT: srl a4, a4, a5 -; RV32I-NEXT: sll a5, a0, a3 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: or a4, a5, a6 ; RV32I-NEXT: beqz a3, .LBB4_4 ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: .LBB4_4: -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: lui a4, 16 -; RV32I-NEXT: srli a5, a1, 24 -; RV32I-NEXT: srli a6, a0, 16 -; RV32I-NEXT: srli a7, a0, 24 -; RV32I-NEXT: addi a4, a4, -1 -; RV32I-NEXT: and t0, a1, a4 -; RV32I-NEXT: and a4, a0, a4 -; RV32I-NEXT: srli t0, t0, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a1, 16 +; RV32I-NEXT: and a5, a1, a3 +; RV32I-NEXT: srli a6, a1, 24 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb t0, 1(a2) -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: sb a5, 3(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a0, 24 ; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb a6, 6(a2) -; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: sb a3, 5(a2) +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -534,117 +534,117 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_8bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 5(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 7(a0) +; RV64I-NEXT: lbu a0, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addi a4, a4, -1 +; RV64I-NEXT: or a0, t1, a0 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu t1, 3(a1) +; RV64I-NEXT: lbu t2, 4(a1) +; RV64I-NEXT: lbu t3, 5(a1) +; RV64I-NEXT: lbu t4, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a7, a7, t0 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, t1, a6 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, t4 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t3, t2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a1, a1, a6 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a1, a1, a4 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: sra a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srliw a3, a0, 16 -; RV64I-NEXT: and a5, a0, a4 -; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: srli a3, a0, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a1 ; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: and a4, a1, a4 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: sb a5, 1(a2) -; RV64I-NEXT: sb a3, 2(a2) +; RV64I-NEXT: sb a4, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a1, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: sb a1, 5(a2) +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) ; RV32I-NEXT: lbu a7, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) +; RV32I-NEXT: lbu t2, 0(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t1, a0, t1 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: or a0, a0, t1 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a0, a4, 16 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: slli a3, t1, 16 +; RV32I-NEXT: or a6, t3, t2 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: slli a7, a4, 16 ; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: slli t0, a0, 16 ; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a0, a7, a3 +; RV32I-NEXT: or a3, t0, a5 ; RV32I-NEXT: bltu a1, a4, .LBB5_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sra a5, a3, a1 ; RV32I-NEXT: bnez a1, .LBB5_3 ; RV32I-NEXT: j .LBB5_4 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: srl a5, a0, a1 -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: sll a6, a3, a6 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: neg a5, a1 +; RV32I-NEXT: srl a6, a0, a1 +; RV32I-NEXT: sll a5, a3, a5 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: beqz a1, .LBB5_4 ; RV32I-NEXT: .LBB5_3: ; RV32I-NEXT: mv a0, a5 @@ -656,24 +656,24 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB5_6: ; RV32I-NEXT: sra a1, a3, a1 ; RV32I-NEXT: .LBB5_7: -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: lui a4, 16 -; RV32I-NEXT: srli a5, a0, 24 -; RV32I-NEXT: srli a6, a1, 16 -; RV32I-NEXT: srli a7, a1, 24 -; RV32I-NEXT: addi a4, a4, -1 -; RV32I-NEXT: and t0, a0, a4 -; RV32I-NEXT: and a4, a1, a4 -; RV32I-NEXT: srli t0, t0, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a6, a0, 24 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb t0, 1(a2) -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: sb a5, 3(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb a6, 6(a2) -; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: sb a3, 5(a2) +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -686,136 +686,132 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli t0, a0, 32 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB6_2 +; RV64I-NEXT: li a3, 64 +; RV64I-NEXT: or a0, a7, a4 +; RV64I-NEXT: or a4, t0, a5 +; RV64I-NEXT: bltu a1, a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a5, a1, a4 -; RV64I-NEXT: srl a5, a3, a5 +; RV64I-NEXT: sub a5, a1, a3 +; RV64I-NEXT: srl a5, a4, a5 ; RV64I-NEXT: bnez a1, .LBB6_3 ; RV64I-NEXT: j .LBB6_4 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: neg a6, a1 -; RV64I-NEXT: sll a6, a3, a6 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: neg a5, a1 +; RV64I-NEXT: srl a6, a0, a1 +; RV64I-NEXT: sll a5, a4, a5 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: beqz a1, .LBB6_4 ; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB6_4: -; RV64I-NEXT: bltu a1, a4, .LBB6_6 +; RV64I-NEXT: bltu a1, a3, .LBB6_6 ; RV64I-NEXT: # %bb.5: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: j .LBB6_7 ; RV64I-NEXT: .LBB6_6: -; RV64I-NEXT: srl a1, a3, a1 +; RV64I-NEXT: srl a1, a4, a1 ; RV64I-NEXT: .LBB6_7: ; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: srliw a4, a0, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a0, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 -; RV64I-NEXT: srli t1, a1, 32 -; RV64I-NEXT: srliw t2, a1, 16 -; RV64I-NEXT: srliw t3, a1, 24 -; RV64I-NEXT: srli t4, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a0, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a0, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a0, a3, a5 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a0, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a0, a1, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) +; RV64I-NEXT: srli a0, a1, 32 +; RV64I-NEXT: srliw a3, a1, 16 +; RV64I-NEXT: and a5, a1, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a1, 24 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: and a4, a0, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes: @@ -824,189 +820,189 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a3, 2(a0) ; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a6, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: lbu t5, 9(a0) ; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu s0, 8(a0) ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t3, a7, a6 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t2, 14(a0) -; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t4, t4, 8 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or t6, a7, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a3, t4, t6 +; RV32I-NEXT: or a7, t5, s0 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: lbu t2, 12(a0) +; RV32I-NEXT: lbu t4, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s0, 1(a1) +; RV32I-NEXT: lbu s1, 2(a1) +; RV32I-NEXT: lbu s2, 3(a1) +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a7, t4, t2 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t2, a0, t2 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or s1, a7, a6 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: slli a1, a5, 8 +; RV32I-NEXT: or t2, a0, t5 +; RV32I-NEXT: slli a1, a4, 8 ; RV32I-NEXT: slli a0, t0, 8 -; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, s2, s1 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a6, t5, t4 -; RV32I-NEXT: or t0, t2, t6 -; RV32I-NEXT: or a5, s0, s1 -; RV32I-NEXT: slli a5, a5, 3 -; RV32I-NEXT: srl t2, a6, a5 -; RV32I-NEXT: neg t5, a5 -; RV32I-NEXT: sll t4, t0, t5 -; RV32I-NEXT: bltu a5, a7, .LBB6_2 +; RV32I-NEXT: or a4, a4, t0 +; RV32I-NEXT: or a7, t2, a7 +; RV32I-NEXT: slli a4, a4, 3 +; RV32I-NEXT: li t0, 32 +; RV32I-NEXT: neg t4, a4 +; RV32I-NEXT: srl t2, a3, a4 +; RV32I-NEXT: sll t5, a7, t4 +; RV32I-NEXT: bltu a4, t0, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl t6, t0, a5 +; RV32I-NEXT: srl t6, a7, a4 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: or t6, t2, t4 +; RV32I-NEXT: or t6, t2, t5 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: or a3, a0, a4 +; RV32I-NEXT: or a5, a0, a6 ; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: beqz a5, .LBB6_5 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: beqz a4, .LBB6_5 ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a0, t6 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: or a4, t3, a1 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: bltu a5, a7, .LBB6_7 +; RV32I-NEXT: or a6, t3, a1 +; RV32I-NEXT: or a5, t1, a5 +; RV32I-NEXT: bltu a4, t0, .LBB6_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: srl t6, a3, a5 +; RV32I-NEXT: srl t6, a5, a4 ; RV32I-NEXT: j .LBB6_8 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a1, t0, a5 -; RV32I-NEXT: srl t1, a4, a5 -; RV32I-NEXT: sll t3, a3, t5 +; RV32I-NEXT: srl t1, a6, a4 +; RV32I-NEXT: sll t3, a5, t4 +; RV32I-NEXT: srl a1, a7, a4 ; RV32I-NEXT: or t6, t1, t3 ; RV32I-NEXT: .LBB6_8: ; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a5, .LBB6_10 +; RV32I-NEXT: mv t3, a6 +; RV32I-NEXT: beqz a4, .LBB6_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t3, t6 ; RV32I-NEXT: .LBB6_10: -; RV32I-NEXT: sub s0, t1, a5 -; RV32I-NEXT: bltu a5, a7, .LBB6_13 +; RV32I-NEXT: sub s0, t1, a4 +; RV32I-NEXT: bltu a4, t0, .LBB6_13 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: bgeu s0, a7, .LBB6_14 +; RV32I-NEXT: bgeu s0, t0, .LBB6_14 ; RV32I-NEXT: .LBB6_12: -; RV32I-NEXT: sll t5, a6, t5 ; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl s1, a6, s1 -; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: srl s1, a3, s1 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or s2, s1, t5 ; RV32I-NEXT: j .LBB6_15 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl t6, a3, a5 -; RV32I-NEXT: bltu s0, a7, .LBB6_12 +; RV32I-NEXT: srl t6, a5, a4 +; RV32I-NEXT: bltu s0, t0, .LBB6_12 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: li t5, 0 -; RV32I-NEXT: sll s2, a6, s0 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: addi s1, a5, -64 -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: addi s1, a4, -64 +; RV32I-NEXT: mv t5, a7 ; RV32I-NEXT: beqz s0, .LBB6_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: mv t5, s2 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: bltu s1, a7, .LBB6_19 +; RV32I-NEXT: bltu s1, t0, .LBB6_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: srl t2, t0, s1 +; RV32I-NEXT: srl t2, a7, s1 ; RV32I-NEXT: bnez s1, .LBB6_20 ; RV32I-NEXT: j .LBB6_21 ; RV32I-NEXT: .LBB6_19: ; RV32I-NEXT: neg s0, s1 -; RV32I-NEXT: sll s0, t0, s0 +; RV32I-NEXT: sll s0, a7, s0 ; RV32I-NEXT: or t2, t2, s0 ; RV32I-NEXT: beqz s1, .LBB6_21 ; RV32I-NEXT: .LBB6_20: -; RV32I-NEXT: mv a6, t2 +; RV32I-NEXT: mv a3, t2 ; RV32I-NEXT: .LBB6_21: -; RV32I-NEXT: bltu s1, a7, .LBB6_23 +; RV32I-NEXT: bltu s1, t0, .LBB6_23 ; RV32I-NEXT: # %bb.22: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bltu a5, t1, .LBB6_24 +; RV32I-NEXT: bltu a4, t1, .LBB6_24 ; RV32I-NEXT: j .LBB6_25 ; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: srl a7, t0, a5 -; RV32I-NEXT: bgeu a5, t1, .LBB6_25 +; RV32I-NEXT: srl a7, a7, a4 +; RV32I-NEXT: bgeu a4, t1, .LBB6_25 ; RV32I-NEXT: .LBB6_24: -; RV32I-NEXT: or a6, t3, t5 -; RV32I-NEXT: or a7, t6, t4 +; RV32I-NEXT: or a3, t3, t4 +; RV32I-NEXT: or a7, t6, t5 ; RV32I-NEXT: .LBB6_25: -; RV32I-NEXT: bnez a5, .LBB6_29 +; RV32I-NEXT: bnez a4, .LBB6_29 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: bltu a5, t1, .LBB6_28 +; RV32I-NEXT: bltu a4, t1, .LBB6_28 ; RV32I-NEXT: .LBB6_27: ; RV32I-NEXT: li a0, 0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: .LBB6_28: -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a0, 16 -; RV32I-NEXT: srli t3, a0, 24 -; RV32I-NEXT: srli t4, a1, 16 -; RV32I-NEXT: srli t5, a1, 24 -; RV32I-NEXT: addi a6, a6, -1 -; RV32I-NEXT: and t6, a4, a6 -; RV32I-NEXT: srli t6, t6, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, a3, a6 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, a0, a6 -; RV32I-NEXT: and a4, a1, a6 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a6, 16 +; RV32I-NEXT: and a7, a6, a3 +; RV32I-NEXT: srli t0, a6, 24 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: sb a6, 0(a2) +; RV32I-NEXT: sb a7, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: srli a4, a5, 16 +; RV32I-NEXT: and a6, a5, a3 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: srli a7, a5, 24 +; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: sb a6, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a3, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a4, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB6_29: -; RV32I-NEXT: mv a4, a6 -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: bgeu a5, t1, .LBB6_27 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a5, a7 +; RV32I-NEXT: bgeu a4, t1, .LBB6_27 ; RV32I-NEXT: j .LBB6_28 %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -1019,136 +1015,132 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli t0, a0, 32 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 5 -; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB7_2 +; RV64I-NEXT: li a3, 64 +; RV64I-NEXT: or a0, a7, a4 +; RV64I-NEXT: or a4, t0, a5 +; RV64I-NEXT: bltu a1, a3, .LBB7_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a5, a1, a4 -; RV64I-NEXT: srl a5, a3, a5 +; RV64I-NEXT: sub a5, a1, a3 +; RV64I-NEXT: srl a5, a4, a5 ; RV64I-NEXT: bnez a1, .LBB7_3 ; RV64I-NEXT: j .LBB7_4 ; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: neg a6, a1 -; RV64I-NEXT: sll a6, a3, a6 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: neg a5, a1 +; RV64I-NEXT: srl a6, a0, a1 +; RV64I-NEXT: sll a5, a4, a5 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: beqz a1, .LBB7_4 ; RV64I-NEXT: .LBB7_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB7_4: -; RV64I-NEXT: bltu a1, a4, .LBB7_6 +; RV64I-NEXT: bltu a1, a3, .LBB7_6 ; RV64I-NEXT: # %bb.5: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: j .LBB7_7 ; RV64I-NEXT: .LBB7_6: -; RV64I-NEXT: srl a1, a3, a1 +; RV64I-NEXT: srl a1, a4, a1 ; RV64I-NEXT: .LBB7_7: ; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: srliw a4, a0, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a0, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 -; RV64I-NEXT: srli t1, a1, 32 -; RV64I-NEXT: srliw t2, a1, 16 -; RV64I-NEXT: srliw t3, a1, 24 -; RV64I-NEXT: srli t4, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a0, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a0, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a0, a3, a5 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a0, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a0, a1, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) +; RV64I-NEXT: srli a0, a1, 32 +; RV64I-NEXT: srliw a3, a1, 16 +; RV64I-NEXT: and a5, a1, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a1, 24 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: and a4, a0, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes_wordOff: @@ -1157,189 +1149,189 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a3, 2(a0) ; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a6, 4(a0) ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: lbu t5, 9(a0) ; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu s0, 8(a0) ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t3, a7, a6 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t2, 14(a0) -; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t4, t4, 8 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or t6, a7, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a3, t4, t6 +; RV32I-NEXT: or a7, t5, s0 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: lbu t2, 12(a0) +; RV32I-NEXT: lbu t4, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s0, 1(a1) +; RV32I-NEXT: lbu s1, 2(a1) +; RV32I-NEXT: lbu s2, 3(a1) +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a7, t4, t2 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t2, a0, t2 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or s1, a7, a6 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: slli a1, a5, 8 +; RV32I-NEXT: or t2, a0, t5 +; RV32I-NEXT: slli a1, a4, 8 ; RV32I-NEXT: slli a0, t0, 8 -; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, s2, s1 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: slli a4, a4, 16 ; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a6, t5, t4 -; RV32I-NEXT: or t0, t2, t6 -; RV32I-NEXT: or a5, s0, s1 -; RV32I-NEXT: slli a5, a5, 5 -; RV32I-NEXT: srl t2, a6, a5 -; RV32I-NEXT: neg t5, a5 -; RV32I-NEXT: sll t4, t0, t5 -; RV32I-NEXT: bltu a5, a7, .LBB7_2 +; RV32I-NEXT: or a4, a4, t0 +; RV32I-NEXT: or a7, t2, a7 +; RV32I-NEXT: slli a4, a4, 5 +; RV32I-NEXT: li t0, 32 +; RV32I-NEXT: neg t4, a4 +; RV32I-NEXT: srl t2, a3, a4 +; RV32I-NEXT: sll t5, a7, t4 +; RV32I-NEXT: bltu a4, t0, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl t6, t0, a5 +; RV32I-NEXT: srl t6, a7, a4 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: or t6, t2, t4 +; RV32I-NEXT: or t6, t2, t5 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: or a3, a0, a4 +; RV32I-NEXT: or a5, a0, a6 ; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: beqz a5, .LBB7_5 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: beqz a4, .LBB7_5 ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a0, t6 ; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: or a4, t3, a1 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: bltu a5, a7, .LBB7_7 +; RV32I-NEXT: or a6, t3, a1 +; RV32I-NEXT: or a5, t1, a5 +; RV32I-NEXT: bltu a4, t0, .LBB7_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: srl t6, a3, a5 +; RV32I-NEXT: srl t6, a5, a4 ; RV32I-NEXT: j .LBB7_8 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: srl a1, t0, a5 -; RV32I-NEXT: srl t1, a4, a5 -; RV32I-NEXT: sll t3, a3, t5 +; RV32I-NEXT: srl t1, a6, a4 +; RV32I-NEXT: sll t3, a5, t4 +; RV32I-NEXT: srl a1, a7, a4 ; RV32I-NEXT: or t6, t1, t3 ; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a5, .LBB7_10 +; RV32I-NEXT: mv t3, a6 +; RV32I-NEXT: beqz a4, .LBB7_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t3, t6 ; RV32I-NEXT: .LBB7_10: -; RV32I-NEXT: sub s0, t1, a5 -; RV32I-NEXT: bltu a5, a7, .LBB7_13 +; RV32I-NEXT: sub s0, t1, a4 +; RV32I-NEXT: bltu a4, t0, .LBB7_13 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: bgeu s0, a7, .LBB7_14 +; RV32I-NEXT: bgeu s0, t0, .LBB7_14 ; RV32I-NEXT: .LBB7_12: -; RV32I-NEXT: sll t5, a6, t5 ; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl s1, a6, s1 -; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: srl s1, a3, s1 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or s2, s1, t5 ; RV32I-NEXT: j .LBB7_15 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl t6, a3, a5 -; RV32I-NEXT: bltu s0, a7, .LBB7_12 +; RV32I-NEXT: srl t6, a5, a4 +; RV32I-NEXT: bltu s0, t0, .LBB7_12 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: li t5, 0 -; RV32I-NEXT: sll s2, a6, s0 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: addi s1, a5, -64 -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: addi s1, a4, -64 +; RV32I-NEXT: mv t5, a7 ; RV32I-NEXT: beqz s0, .LBB7_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: mv t5, s2 ; RV32I-NEXT: .LBB7_17: -; RV32I-NEXT: bltu s1, a7, .LBB7_19 +; RV32I-NEXT: bltu s1, t0, .LBB7_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: srl t2, t0, s1 +; RV32I-NEXT: srl t2, a7, s1 ; RV32I-NEXT: bnez s1, .LBB7_20 ; RV32I-NEXT: j .LBB7_21 ; RV32I-NEXT: .LBB7_19: ; RV32I-NEXT: neg s0, s1 -; RV32I-NEXT: sll s0, t0, s0 +; RV32I-NEXT: sll s0, a7, s0 ; RV32I-NEXT: or t2, t2, s0 ; RV32I-NEXT: beqz s1, .LBB7_21 ; RV32I-NEXT: .LBB7_20: -; RV32I-NEXT: mv a6, t2 +; RV32I-NEXT: mv a3, t2 ; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: bltu s1, a7, .LBB7_23 +; RV32I-NEXT: bltu s1, t0, .LBB7_23 ; RV32I-NEXT: # %bb.22: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bltu a5, t1, .LBB7_24 +; RV32I-NEXT: bltu a4, t1, .LBB7_24 ; RV32I-NEXT: j .LBB7_25 ; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: srl a7, t0, a5 -; RV32I-NEXT: bgeu a5, t1, .LBB7_25 +; RV32I-NEXT: srl a7, a7, a4 +; RV32I-NEXT: bgeu a4, t1, .LBB7_25 ; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: or a6, t3, t5 -; RV32I-NEXT: or a7, t6, t4 +; RV32I-NEXT: or a3, t3, t4 +; RV32I-NEXT: or a7, t6, t5 ; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: bnez a5, .LBB7_29 +; RV32I-NEXT: bnez a4, .LBB7_29 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: bltu a5, t1, .LBB7_28 +; RV32I-NEXT: bltu a4, t1, .LBB7_28 ; RV32I-NEXT: .LBB7_27: ; RV32I-NEXT: li a0, 0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: .LBB7_28: -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a0, 16 -; RV32I-NEXT: srli t3, a0, 24 -; RV32I-NEXT: srli t4, a1, 16 -; RV32I-NEXT: srli t5, a1, 24 -; RV32I-NEXT: addi a6, a6, -1 -; RV32I-NEXT: and t6, a4, a6 -; RV32I-NEXT: srli t6, t6, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, a3, a6 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, a0, a6 -; RV32I-NEXT: and a4, a1, a6 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a6, 16 +; RV32I-NEXT: and a7, a6, a3 +; RV32I-NEXT: srli t0, a6, 24 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: sb a6, 0(a2) +; RV32I-NEXT: sb a7, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: srli a4, a5, 16 +; RV32I-NEXT: and a6, a5, a3 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: srli a7, a5, 24 +; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: sb a6, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a3, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a4, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB7_29: -; RV32I-NEXT: mv a4, a6 -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: bgeu a5, t1, .LBB7_27 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a5, a7 +; RV32I-NEXT: bgeu a4, t1, .LBB7_27 ; RV32I-NEXT: j .LBB7_28 %src = load i128, ptr %src.ptr, align 1 %wordOff = load i128, ptr %wordOff.ptr, align 1 @@ -1352,76 +1344,74 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a6, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a5, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a3, a1, 3 ; RV64I-NEXT: li a5, 64 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: bltu a3, a5, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 @@ -1430,101 +1420,99 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bnez a3, .LBB8_3 ; RV64I-NEXT: j .LBB8_4 ; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: neg a1, a3 +; RV64I-NEXT: srl a5, a4, a1 +; RV64I-NEXT: sll a6, a0, a3 ; RV64I-NEXT: sll a1, a4, a3 -; RV64I-NEXT: neg a5, a3 -; RV64I-NEXT: srl a4, a4, a5 -; RV64I-NEXT: sll a5, a0, a3 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: or a4, a5, a6 ; RV64I-NEXT: beqz a3, .LBB8_4 ; RV64I-NEXT: .LBB8_3: ; RV64I-NEXT: mv a0, a4 ; RV64I-NEXT: .LBB8_4: ; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: srliw a4, a1, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a1, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a1, 24 -; RV64I-NEXT: srli a7, a1, 48 -; RV64I-NEXT: srli t0, a1, 56 -; RV64I-NEXT: srli t1, a0, 32 -; RV64I-NEXT: srliw t2, a0, 16 -; RV64I-NEXT: srliw t3, a0, 24 -; RV64I-NEXT: srli t4, a0, 48 -; RV64I-NEXT: srli t5, a0, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a1, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a1, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a1, a3, a5 -; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a1, a0, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a3, a0, 16 +; RV64I-NEXT: and a5, a0, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: and a4, a1, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a0, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or t2, a6, t0 -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t1 -; RV32I-NEXT: li t1, 32 +; RV32I-NEXT: lbu t4, 3(a1) +; RV32I-NEXT: lbu a1, 2(a1) +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or a3, t1, t0 +; RV32I-NEXT: or a4, t3, t2 ; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a1, t4, a1 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t0, a4, a3 -; RV32I-NEXT: or a4, a7, a5 -; RV32I-NEXT: or a5, a1, t2 -; RV32I-NEXT: slli a5, a5, 3 -; RV32I-NEXT: neg t3, a5 -; RV32I-NEXT: srl t4, t0, t3 -; RV32I-NEXT: sll t2, a4, a5 -; RV32I-NEXT: bltu a5, t1, .LBB8_2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: slli a4, a1, 3 +; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: li t2, 32 +; RV32I-NEXT: neg t3, a4 +; RV32I-NEXT: srl t4, a7, t3 +; RV32I-NEXT: sll t1, a3, a4 +; RV32I-NEXT: bltu a4, t2, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sll t5, t0, a5 +; RV32I-NEXT: sll t5, a7, a4 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sll a1, t0, a5 -; RV32I-NEXT: or t5, t4, t2 +; RV32I-NEXT: sll a1, a7, a4 +; RV32I-NEXT: or t5, t4, t1 ; RV32I-NEXT: .LBB8_3: ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw s0, 28(sp) # 4-byte Folded Spill @@ -1533,85 +1521,84 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu s3, 11(a0) -; RV32I-NEXT: lbu s1, 15(a0) -; RV32I-NEXT: sub a7, a6, a5 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: beqz a5, .LBB8_5 +; RV32I-NEXT: lbu s2, 15(a0) +; RV32I-NEXT: sub t0, a6, a4 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz a4, .LBB8_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: mv a5, t5 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: lbu s2, 9(a0) +; RV32I-NEXT: lbu t6, 9(a0) ; RV32I-NEXT: lbu t5, 10(a0) -; RV32I-NEXT: lbu s0, 13(a0) -; RV32I-NEXT: lbu t6, 14(a0) +; RV32I-NEXT: lbu s1, 13(a0) +; RV32I-NEXT: lbu s0, 14(a0) ; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: bltu a7, t1, .LBB8_7 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: bltu t0, t2, .LBB8_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srl s4, a4, a7 +; RV32I-NEXT: srl s4, a3, t0 ; RV32I-NEXT: j .LBB8_8 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: neg s4, a7 -; RV32I-NEXT: sll s4, a4, s4 +; RV32I-NEXT: neg s4, t0 +; RV32I-NEXT: sll s4, a3, s4 ; RV32I-NEXT: or s4, t4, s4 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: lbu s6, 8(a0) -; RV32I-NEXT: lbu s5, 12(a0) +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu a0, 12(a0) +; RV32I-NEXT: slli t6, t6, 8 ; RV32I-NEXT: or s3, s3, t5 -; RV32I-NEXT: slli t5, s0, 8 -; RV32I-NEXT: or s1, s1, t6 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beqz a7, .LBB8_10 +; RV32I-NEXT: slli t5, s1, 8 +; RV32I-NEXT: or s1, s2, s0 +; RV32I-NEXT: mv t4, a7 +; RV32I-NEXT: beqz t0, .LBB8_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t4, s4 ; RV32I-NEXT: .LBB8_10: -; RV32I-NEXT: or a0, s2, s6 +; RV32I-NEXT: or t6, t6, s5 ; RV32I-NEXT: slli s0, s3, 16 -; RV32I-NEXT: or t6, t5, s5 +; RV32I-NEXT: or a0, t5, a0 ; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: bltu a7, t1, .LBB8_12 +; RV32I-NEXT: bltu t0, t2, .LBB8_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: j .LBB8_13 ; RV32I-NEXT: .LBB8_12: -; RV32I-NEXT: srl t5, a4, t3 +; RV32I-NEXT: srl t5, a3, t3 ; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: or a7, s0, a0 -; RV32I-NEXT: or a0, s1, t6 -; RV32I-NEXT: bltu a5, t1, .LBB8_15 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: bltu a4, t2, .LBB8_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: sll s1, a7, a5 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: sll s1, t0, a4 ; RV32I-NEXT: j .LBB8_16 ; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: sll t6, a7, a5 -; RV32I-NEXT: srl t3, a7, t3 -; RV32I-NEXT: sll s0, a0, a5 -; RV32I-NEXT: or s1, t3, s0 +; RV32I-NEXT: srl t6, t0, t3 +; RV32I-NEXT: sll s0, a0, a4 +; RV32I-NEXT: sll t3, t0, a4 +; RV32I-NEXT: or s1, t6, s0 ; RV32I-NEXT: .LBB8_16: -; RV32I-NEXT: addi s0, a5, -64 -; RV32I-NEXT: mv t3, a0 -; RV32I-NEXT: beqz a5, .LBB8_18 +; RV32I-NEXT: addi s0, a4, -64 +; RV32I-NEXT: mv t6, a0 +; RV32I-NEXT: beqz a4, .LBB8_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: mv t6, s1 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: bltu s0, t1, .LBB8_20 +; RV32I-NEXT: bltu s0, t2, .LBB8_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: sll t0, t0, s0 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: sll a7, a7, s0 ; RV32I-NEXT: bnez s0, .LBB8_21 ; RV32I-NEXT: j .LBB8_22 ; RV32I-NEXT: .LBB8_20: -; RV32I-NEXT: sll t1, t0, a5 -; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl t0, t0, s1 -; RV32I-NEXT: or t0, t0, t2 +; RV32I-NEXT: neg t2, s0 +; RV32I-NEXT: srl s1, a7, t2 +; RV32I-NEXT: sll t2, a7, a4 +; RV32I-NEXT: or a7, s1, t1 ; RV32I-NEXT: beqz s0, .LBB8_22 ; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: mv a4, t0 +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB8_22: ; RV32I-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 24(sp) # 4-byte Folded Reload @@ -1619,56 +1606,55 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 -; RV32I-NEXT: bltu a5, a6, .LBB8_24 +; RV32I-NEXT: bltu a4, a6, .LBB8_24 ; RV32I-NEXT: # %bb.23: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez a5, .LBB8_25 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bnez a4, .LBB8_25 ; RV32I-NEXT: j .LBB8_26 ; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: or t1, t4, t6 -; RV32I-NEXT: or a4, t5, t3 -; RV32I-NEXT: beqz a5, .LBB8_26 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or a3, t5, t6 +; RV32I-NEXT: beqz a4, .LBB8_26 ; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv t0, t2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: .LBB8_26: +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 ; RV32I-NEXT: srli a4, a1, 16 -; RV32I-NEXT: lui a5, 16 -; RV32I-NEXT: srli a6, a1, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a7, 16 -; RV32I-NEXT: srli t3, a7, 24 -; RV32I-NEXT: srli t4, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: addi a5, a5, -1 -; RV32I-NEXT: and t6, a1, a5 -; RV32I-NEXT: srli t6, t6, 8 +; RV32I-NEXT: and a6, a1, a3 +; RV32I-NEXT: srli a7, a1, 24 +; RV32I-NEXT: srli a6, a6, 8 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) +; RV32I-NEXT: sb a6, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: and a1, a3, a5 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a1, a7, a5 -; RV32I-NEXT: and a5, a0, a5 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a7, 3(a2) +; RV32I-NEXT: srli a1, a5, 16 +; RV32I-NEXT: and a4, a5, a3 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: sb a6, 7(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: and a4, t0, a3 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a0, 24 ; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -1681,76 +1667,74 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: or a3, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a6, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a5, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a3, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a1, a1, a5 ; RV64I-NEXT: slli a3, a1, 5 ; RV64I-NEXT: li a5, 64 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: bltu a3, a5, .LBB9_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 @@ -1759,101 +1743,99 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: bnez a3, .LBB9_3 ; RV64I-NEXT: j .LBB9_4 ; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: neg a1, a3 +; RV64I-NEXT: srl a5, a4, a1 +; RV64I-NEXT: sll a6, a0, a3 ; RV64I-NEXT: sll a1, a4, a3 -; RV64I-NEXT: neg a5, a3 -; RV64I-NEXT: srl a4, a4, a5 -; RV64I-NEXT: sll a5, a0, a3 -; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: or a4, a5, a6 ; RV64I-NEXT: beqz a3, .LBB9_4 ; RV64I-NEXT: .LBB9_3: ; RV64I-NEXT: mv a0, a4 ; RV64I-NEXT: .LBB9_4: ; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: srliw a4, a1, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a1, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a1, 24 -; RV64I-NEXT: srli a7, a1, 48 -; RV64I-NEXT: srli t0, a1, 56 -; RV64I-NEXT: srli t1, a0, 32 -; RV64I-NEXT: srliw t2, a0, 16 -; RV64I-NEXT: srliw t3, a0, 24 -; RV64I-NEXT: srli t4, a0, 48 -; RV64I-NEXT: srli t5, a0, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a1, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a1, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a1, a3, a5 -; RV64I-NEXT: srli a1, a1, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a1, a0, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a3, a0, 16 +; RV64I-NEXT: and a5, a0, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a0, 48 +; RV64I-NEXT: and a4, a1, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a0, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes_wordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: lbu a5, 0(a1) ; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or t2, a6, t0 -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t1 -; RV32I-NEXT: li t1, 32 +; RV32I-NEXT: lbu t4, 3(a1) +; RV32I-NEXT: lbu a1, 2(a1) +; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or a3, t1, t0 +; RV32I-NEXT: or a4, t3, t2 ; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a1, t4, a1 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t0, a4, a3 -; RV32I-NEXT: or a4, a7, a5 -; RV32I-NEXT: or a5, a1, t2 -; RV32I-NEXT: slli a5, a5, 5 -; RV32I-NEXT: neg t3, a5 -; RV32I-NEXT: srl t4, t0, t3 -; RV32I-NEXT: sll t2, a4, a5 -; RV32I-NEXT: bltu a5, t1, .LBB9_2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: slli a4, a1, 5 +; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: li t2, 32 +; RV32I-NEXT: neg t3, a4 +; RV32I-NEXT: srl t4, a7, t3 +; RV32I-NEXT: sll t1, a3, a4 +; RV32I-NEXT: bltu a4, t2, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sll t5, t0, a5 +; RV32I-NEXT: sll t5, a7, a4 ; RV32I-NEXT: j .LBB9_3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: sll a1, t0, a5 -; RV32I-NEXT: or t5, t4, t2 +; RV32I-NEXT: sll a1, a7, a4 +; RV32I-NEXT: or t5, t4, t1 ; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw s0, 28(sp) # 4-byte Folded Spill @@ -1862,85 +1844,84 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu s3, 11(a0) -; RV32I-NEXT: lbu s1, 15(a0) -; RV32I-NEXT: sub a7, a6, a5 -; RV32I-NEXT: mv a3, a4 -; RV32I-NEXT: beqz a5, .LBB9_5 +; RV32I-NEXT: lbu s2, 15(a0) +; RV32I-NEXT: sub t0, a6, a4 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz a4, .LBB9_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: mv a5, t5 ; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: lbu s2, 9(a0) +; RV32I-NEXT: lbu t6, 9(a0) ; RV32I-NEXT: lbu t5, 10(a0) -; RV32I-NEXT: lbu s0, 13(a0) -; RV32I-NEXT: lbu t6, 14(a0) +; RV32I-NEXT: lbu s1, 13(a0) +; RV32I-NEXT: lbu s0, 14(a0) ; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: bltu a7, t1, .LBB9_7 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: bltu t0, t2, .LBB9_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srl s4, a4, a7 +; RV32I-NEXT: srl s4, a3, t0 ; RV32I-NEXT: j .LBB9_8 ; RV32I-NEXT: .LBB9_7: -; RV32I-NEXT: neg s4, a7 -; RV32I-NEXT: sll s4, a4, s4 +; RV32I-NEXT: neg s4, t0 +; RV32I-NEXT: sll s4, a3, s4 ; RV32I-NEXT: or s4, t4, s4 ; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: lbu s6, 8(a0) -; RV32I-NEXT: lbu s5, 12(a0) +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu a0, 12(a0) +; RV32I-NEXT: slli t6, t6, 8 ; RV32I-NEXT: or s3, s3, t5 -; RV32I-NEXT: slli t5, s0, 8 -; RV32I-NEXT: or s1, s1, t6 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beqz a7, .LBB9_10 +; RV32I-NEXT: slli t5, s1, 8 +; RV32I-NEXT: or s1, s2, s0 +; RV32I-NEXT: mv t4, a7 +; RV32I-NEXT: beqz t0, .LBB9_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t4, s4 ; RV32I-NEXT: .LBB9_10: -; RV32I-NEXT: or a0, s2, s6 +; RV32I-NEXT: or t6, t6, s5 ; RV32I-NEXT: slli s0, s3, 16 -; RV32I-NEXT: or t6, t5, s5 +; RV32I-NEXT: or a0, t5, a0 ; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: bltu a7, t1, .LBB9_12 +; RV32I-NEXT: bltu t0, t2, .LBB9_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: j .LBB9_13 ; RV32I-NEXT: .LBB9_12: -; RV32I-NEXT: srl t5, a4, t3 +; RV32I-NEXT: srl t5, a3, t3 ; RV32I-NEXT: .LBB9_13: -; RV32I-NEXT: or a7, s0, a0 -; RV32I-NEXT: or a0, s1, t6 -; RV32I-NEXT: bltu a5, t1, .LBB9_15 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: bltu a4, t2, .LBB9_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: sll s1, a7, a5 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: sll s1, t0, a4 ; RV32I-NEXT: j .LBB9_16 ; RV32I-NEXT: .LBB9_15: -; RV32I-NEXT: sll t6, a7, a5 -; RV32I-NEXT: srl t3, a7, t3 -; RV32I-NEXT: sll s0, a0, a5 -; RV32I-NEXT: or s1, t3, s0 +; RV32I-NEXT: srl t6, t0, t3 +; RV32I-NEXT: sll s0, a0, a4 +; RV32I-NEXT: sll t3, t0, a4 +; RV32I-NEXT: or s1, t6, s0 ; RV32I-NEXT: .LBB9_16: -; RV32I-NEXT: addi s0, a5, -64 -; RV32I-NEXT: mv t3, a0 -; RV32I-NEXT: beqz a5, .LBB9_18 +; RV32I-NEXT: addi s0, a4, -64 +; RV32I-NEXT: mv t6, a0 +; RV32I-NEXT: beqz a4, .LBB9_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: mv t6, s1 ; RV32I-NEXT: .LBB9_18: -; RV32I-NEXT: bltu s0, t1, .LBB9_20 +; RV32I-NEXT: bltu s0, t2, .LBB9_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: sll t0, t0, s0 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: sll a7, a7, s0 ; RV32I-NEXT: bnez s0, .LBB9_21 ; RV32I-NEXT: j .LBB9_22 ; RV32I-NEXT: .LBB9_20: -; RV32I-NEXT: sll t1, t0, a5 -; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl t0, t0, s1 -; RV32I-NEXT: or t0, t0, t2 +; RV32I-NEXT: neg t2, s0 +; RV32I-NEXT: srl s1, a7, t2 +; RV32I-NEXT: sll t2, a7, a4 +; RV32I-NEXT: or a7, s1, t1 ; RV32I-NEXT: beqz s0, .LBB9_22 ; RV32I-NEXT: .LBB9_21: -; RV32I-NEXT: mv a4, t0 +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB9_22: ; RV32I-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 24(sp) # 4-byte Folded Reload @@ -1948,56 +1929,55 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 -; RV32I-NEXT: bltu a5, a6, .LBB9_24 +; RV32I-NEXT: bltu a4, a6, .LBB9_24 ; RV32I-NEXT: # %bb.23: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez a5, .LBB9_25 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bnez a4, .LBB9_25 ; RV32I-NEXT: j .LBB9_26 ; RV32I-NEXT: .LBB9_24: -; RV32I-NEXT: or t1, t4, t6 -; RV32I-NEXT: or a4, t5, t3 -; RV32I-NEXT: beqz a5, .LBB9_26 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or a3, t5, t6 +; RV32I-NEXT: beqz a4, .LBB9_26 ; RV32I-NEXT: .LBB9_25: -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv t0, t2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: .LBB9_26: +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 ; RV32I-NEXT: srli a4, a1, 16 -; RV32I-NEXT: lui a5, 16 -; RV32I-NEXT: srli a6, a1, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a7, 16 -; RV32I-NEXT: srli t3, a7, 24 -; RV32I-NEXT: srli t4, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: addi a5, a5, -1 -; RV32I-NEXT: and t6, a1, a5 -; RV32I-NEXT: srli t6, t6, 8 +; RV32I-NEXT: and a6, a1, a3 +; RV32I-NEXT: srli a7, a1, 24 +; RV32I-NEXT: srli a6, a6, 8 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) +; RV32I-NEXT: sb a6, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: and a1, a3, a5 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a1, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a1, a7, a5 -; RV32I-NEXT: and a5, a0, a5 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a7, 3(a2) +; RV32I-NEXT: srli a1, a5, 16 +; RV32I-NEXT: and a4, a5, a3 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a5, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: sb a6, 7(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: and a4, t0, a3 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb t0, 8(a2) +; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a0, 24 ; RV32I-NEXT: sb a0, 12(a2) -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %wordOff = load i128, ptr %wordOff.ptr, align 1 @@ -2011,76 +1991,74 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a4, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a4, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli t0, a0, 32 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 +; RV64I-NEXT: or a0, a7, a3 +; RV64I-NEXT: or a3, t0, a5 ; RV64I-NEXT: bltu a1, a4, .LBB10_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a5, a1, a4 @@ -2088,10 +2066,10 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: bnez a1, .LBB10_3 ; RV64I-NEXT: j .LBB10_4 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: neg a6, a1 -; RV64I-NEXT: sll a6, a3, a6 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: neg a5, a1 +; RV64I-NEXT: srl a6, a0, a1 +; RV64I-NEXT: sll a5, a3, a5 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: beqz a1, .LBB10_4 ; RV64I-NEXT: .LBB10_3: ; RV64I-NEXT: mv a0, a5 @@ -2104,43 +2082,41 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sra a1, a3, a1 ; RV64I-NEXT: .LBB10_7: ; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: srliw a4, a0, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a0, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 -; RV64I-NEXT: srli t1, a1, 32 -; RV64I-NEXT: srliw t2, a1, 16 -; RV64I-NEXT: srliw t3, a1, 24 -; RV64I-NEXT: srli t4, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a0, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a0, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a0, a3, a5 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a0, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a0, a1, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) +; RV64I-NEXT: srli a0, a1, 32 +; RV64I-NEXT: srliw a3, a1, 16 +; RV64I-NEXT: and a5, a1, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a1, 24 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: and a4, a0, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes: @@ -2149,189 +2125,189 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu t2, 5(a0) -; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a7, 4(a0) +; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: lbu t5, 9(a0) ; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: lbu s0, 8(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or t3, a5, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t3, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t4, t4, 8 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t6, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a3, t4, t6 +; RV32I-NEXT: or a5, t5, s0 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: lbu t2, 12(a0) +; RV32I-NEXT: lbu t4, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s0, 1(a1) +; RV32I-NEXT: lbu s1, 2(a1) +; RV32I-NEXT: lbu s2, 3(a1) +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a5, t4, t2 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or s1, a0, t0 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or t6, t6, a7 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t2, a0, t5 +; RV32I-NEXT: slli a1, a4, 8 +; RV32I-NEXT: slli a0, t0, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, s2, s1 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: or t0, a4, t0 +; RV32I-NEXT: or a4, t2, a5 +; RV32I-NEXT: slli a5, t0, 3 ; RV32I-NEXT: li t0, 32 -; RV32I-NEXT: slli a1, a5, 8 -; RV32I-NEXT: slli a0, t2, 8 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or a5, s1, a6 -; RV32I-NEXT: or a6, s0, t6 -; RV32I-NEXT: slli a6, a6, 3 -; RV32I-NEXT: srl t2, a7, a6 -; RV32I-NEXT: neg t6, a6 -; RV32I-NEXT: sll t4, a5, t6 -; RV32I-NEXT: bltu a6, t0, .LBB10_2 +; RV32I-NEXT: neg t4, a5 +; RV32I-NEXT: srl t2, a3, a5 +; RV32I-NEXT: sll t5, a4, t4 +; RV32I-NEXT: bltu a5, t0, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t5, a5, a6 +; RV32I-NEXT: sra t6, a4, a5 ; RV32I-NEXT: j .LBB10_3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: or t5, t2, t4 +; RV32I-NEXT: or t6, t2, t5 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: or a3, a0, a4 +; RV32I-NEXT: or a6, a0, a7 ; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: beqz a6, .LBB10_5 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: beqz a5, .LBB10_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, t5 +; RV32I-NEXT: mv a0, t6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: or a4, t3, a1 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: bltu a6, t0, .LBB10_7 +; RV32I-NEXT: or a7, t3, a1 +; RV32I-NEXT: or a6, t1, a6 +; RV32I-NEXT: bltu a5, t0, .LBB10_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srai a1, a5, 31 -; RV32I-NEXT: srl t5, a3, a6 +; RV32I-NEXT: srai a1, a4, 31 +; RV32I-NEXT: srl t6, a6, a5 ; RV32I-NEXT: j .LBB10_8 ; RV32I-NEXT: .LBB10_7: -; RV32I-NEXT: sra a1, a5, a6 -; RV32I-NEXT: srl t1, a4, a6 -; RV32I-NEXT: sll t3, a3, t6 -; RV32I-NEXT: or t5, t1, t3 +; RV32I-NEXT: srl t1, a7, a5 +; RV32I-NEXT: sll t3, a6, t4 +; RV32I-NEXT: sra a1, a4, a5 +; RV32I-NEXT: or t6, t1, t3 ; RV32I-NEXT: .LBB10_8: ; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a6, .LBB10_10 +; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: beqz a5, .LBB10_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: mv t3, t6 ; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: sub s0, t1, a6 -; RV32I-NEXT: bltu a6, t0, .LBB10_13 +; RV32I-NEXT: sub s0, t1, a5 +; RV32I-NEXT: bltu a5, t0, .LBB10_13 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: li t6, 0 ; RV32I-NEXT: bgeu s0, t0, .LBB10_14 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: sll t6, a7, t6 ; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl s1, a7, s1 -; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: srl s1, a3, s1 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or s2, s1, t5 ; RV32I-NEXT: j .LBB10_15 ; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: srl t5, a3, a6 +; RV32I-NEXT: srl t6, a6, a5 ; RV32I-NEXT: bltu s0, t0, .LBB10_12 ; RV32I-NEXT: .LBB10_14: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: sll s2, a7, s0 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB10_15: -; RV32I-NEXT: addi s1, a6, -64 -; RV32I-NEXT: mv t4, a5 +; RV32I-NEXT: addi s1, a5, -64 +; RV32I-NEXT: mv t5, a4 ; RV32I-NEXT: beqz s0, .LBB10_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: mv t5, s2 ; RV32I-NEXT: .LBB10_17: ; RV32I-NEXT: bltu s1, t0, .LBB10_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: sra t2, a5, s1 +; RV32I-NEXT: sra t2, a4, s1 ; RV32I-NEXT: bnez s1, .LBB10_20 ; RV32I-NEXT: j .LBB10_21 ; RV32I-NEXT: .LBB10_19: ; RV32I-NEXT: neg s0, s1 -; RV32I-NEXT: sll s0, a5, s0 +; RV32I-NEXT: sll s0, a4, s0 ; RV32I-NEXT: or t2, t2, s0 ; RV32I-NEXT: beqz s1, .LBB10_21 ; RV32I-NEXT: .LBB10_20: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: mv a3, t2 ; RV32I-NEXT: .LBB10_21: ; RV32I-NEXT: bltu s1, t0, .LBB10_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: srai t0, a5, 31 -; RV32I-NEXT: bltu a6, t1, .LBB10_24 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: bltu a5, t1, .LBB10_24 ; RV32I-NEXT: j .LBB10_25 ; RV32I-NEXT: .LBB10_23: -; RV32I-NEXT: sra t0, a5, a6 -; RV32I-NEXT: bgeu a6, t1, .LBB10_25 +; RV32I-NEXT: sra t0, a4, a5 +; RV32I-NEXT: bgeu a5, t1, .LBB10_25 ; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: or a7, t3, t6 -; RV32I-NEXT: or t0, t5, t4 +; RV32I-NEXT: or a3, t3, t4 +; RV32I-NEXT: or t0, t6, t5 ; RV32I-NEXT: .LBB10_25: -; RV32I-NEXT: bnez a6, .LBB10_29 +; RV32I-NEXT: bnez a5, .LBB10_29 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: bltu a6, t1, .LBB10_28 +; RV32I-NEXT: bltu a5, t1, .LBB10_28 ; RV32I-NEXT: .LBB10_27: -; RV32I-NEXT: srai a0, a5, 31 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB10_28: -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a0, 16 -; RV32I-NEXT: srli t3, a0, 24 -; RV32I-NEXT: srli t4, a1, 16 -; RV32I-NEXT: srli t5, a1, 24 -; RV32I-NEXT: addi a6, a6, -1 -; RV32I-NEXT: and t6, a4, a6 -; RV32I-NEXT: srli t6, t6, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, a3, a6 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, a0, a6 -; RV32I-NEXT: and a4, a1, a6 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a7, 16 +; RV32I-NEXT: and a5, a7, a3 +; RV32I-NEXT: srli t0, a7, 24 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: srli a4, a6, 16 +; RV32I-NEXT: and a5, a6, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a7, a6, 24 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a3, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a4, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_29: -; RV32I-NEXT: mv a4, a7 -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: bgeu a6, t1, .LBB10_27 +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: bgeu a5, t1, .LBB10_27 ; RV32I-NEXT: j .LBB10_28 %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -2344,76 +2320,74 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t0, 9(a0) +; RV64I-NEXT: lbu t1, 10(a0) +; RV64I-NEXT: lbu t2, 11(a0) +; RV64I-NEXT: lbu t3, 12(a0) +; RV64I-NEXT: lbu t4, 13(a0) +; RV64I-NEXT: lbu t5, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a4, a5, a6 ; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) -; RV64I-NEXT: lbu t4, 1(a1) -; RV64I-NEXT: lbu t5, 2(a1) -; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a6, t2, t1 +; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) +; RV64I-NEXT: or a0, a0, t5 +; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 1(a1) +; RV64I-NEXT: lbu t2, 2(a1) +; RV64I-NEXT: lbu t3, 3(a1) +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 -; RV64I-NEXT: slli a4, a4, 16 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: or a1, a1, t6 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a7, a4, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli t0, a0, 32 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 +; RV64I-NEXT: or a0, a7, a3 +; RV64I-NEXT: or a3, t0, a5 ; RV64I-NEXT: bltu a1, a4, .LBB11_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a5, a1, a4 @@ -2421,10 +2395,10 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: bnez a1, .LBB11_3 ; RV64I-NEXT: j .LBB11_4 ; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: neg a6, a1 -; RV64I-NEXT: sll a6, a3, a6 -; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: neg a5, a1 +; RV64I-NEXT: srl a6, a0, a1 +; RV64I-NEXT: sll a5, a3, a5 +; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: beqz a1, .LBB11_4 ; RV64I-NEXT: .LBB11_3: ; RV64I-NEXT: mv a0, a5 @@ -2437,43 +2411,41 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sra a1, a3, a1 ; RV64I-NEXT: .LBB11_7: ; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: srliw a4, a0, 16 -; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: srliw a5, a0, 16 +; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: srliw a6, a0, 24 -; RV64I-NEXT: srli a7, a0, 48 -; RV64I-NEXT: srli t0, a0, 56 -; RV64I-NEXT: srli t1, a1, 32 -; RV64I-NEXT: srliw t2, a1, 16 -; RV64I-NEXT: srliw t3, a1, 24 -; RV64I-NEXT: srli t4, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: addi a5, a5, -1 -; RV64I-NEXT: and t6, a0, a5 -; RV64I-NEXT: srli t6, t6, 8 +; RV64I-NEXT: and a7, a0, a4 +; RV64I-NEXT: srli a7, a7, 8 ; RV64I-NEXT: sb a0, 0(a2) -; RV64I-NEXT: sb t6, 1(a2) -; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: sb a7, 1(a2) +; RV64I-NEXT: sb a5, 2(a2) ; RV64I-NEXT: sb a6, 3(a2) -; RV64I-NEXT: and a0, a3, a5 -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: and a5, a3, a4 +; RV64I-NEXT: srli a6, a0, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb t0, 7(a2) -; RV64I-NEXT: and a0, a1, a5 -; RV64I-NEXT: and a3, t1, a5 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 5(a2) +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: sb a0, 7(a2) +; RV64I-NEXT: srli a0, a1, 32 +; RV64I-NEXT: srliw a3, a1, 16 +; RV64I-NEXT: and a5, a1, a4 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a1, 24 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: sb t2, 10(a2) -; RV64I-NEXT: sb t3, 11(a2) -; RV64I-NEXT: sb t1, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t4, 14(a2) -; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a5, 9(a2) +; RV64I-NEXT: sb a3, 10(a2) +; RV64I-NEXT: sb a6, 11(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: and a4, a0, a4 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: sb a3, 14(a2) +; RV64I-NEXT: sb a1, 15(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes_wordOff: @@ -2482,189 +2454,189 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu t2, 5(a0) -; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a7, 4(a0) +; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t4, 11(a0) ; RV32I-NEXT: lbu t5, 9(a0) ; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: lbu s0, 8(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or t3, a5, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t3, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t4, t4, 8 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t6, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: or a3, t4, t6 +; RV32I-NEXT: or a5, t5, s0 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: lbu t2, 12(a0) +; RV32I-NEXT: lbu t4, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s0, 1(a1) +; RV32I-NEXT: lbu s1, 2(a1) +; RV32I-NEXT: lbu s2, 3(a1) +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or a5, t4, t2 ; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or s1, a0, t0 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or t6, t6, a7 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t2, a0, t5 +; RV32I-NEXT: slli a1, a4, 8 +; RV32I-NEXT: slli a0, t0, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, s2, s1 +; RV32I-NEXT: or t0, s0, t6 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: or t0, a4, t0 +; RV32I-NEXT: or a4, t2, a5 +; RV32I-NEXT: slli a5, t0, 5 ; RV32I-NEXT: li t0, 32 -; RV32I-NEXT: slli a1, a5, 8 -; RV32I-NEXT: slli a0, t2, 8 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or a5, s1, a6 -; RV32I-NEXT: or a6, s0, t6 -; RV32I-NEXT: slli a6, a6, 5 -; RV32I-NEXT: srl t2, a7, a6 -; RV32I-NEXT: neg t6, a6 -; RV32I-NEXT: sll t4, a5, t6 -; RV32I-NEXT: bltu a6, t0, .LBB11_2 +; RV32I-NEXT: neg t4, a5 +; RV32I-NEXT: srl t2, a3, a5 +; RV32I-NEXT: sll t5, a4, t4 +; RV32I-NEXT: bltu a5, t0, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t5, a5, a6 +; RV32I-NEXT: sra t6, a4, a5 ; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: or t5, t2, t4 +; RV32I-NEXT: or t6, t2, t5 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: or a3, a0, a4 +; RV32I-NEXT: or a6, a0, a7 ; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: beqz a6, .LBB11_5 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: beqz a5, .LBB11_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, t5 +; RV32I-NEXT: mv a0, t6 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: or a4, t3, a1 -; RV32I-NEXT: or a3, t1, a3 -; RV32I-NEXT: bltu a6, t0, .LBB11_7 +; RV32I-NEXT: or a7, t3, a1 +; RV32I-NEXT: or a6, t1, a6 +; RV32I-NEXT: bltu a5, t0, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srai a1, a5, 31 -; RV32I-NEXT: srl t5, a3, a6 +; RV32I-NEXT: srai a1, a4, 31 +; RV32I-NEXT: srl t6, a6, a5 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sra a1, a5, a6 -; RV32I-NEXT: srl t1, a4, a6 -; RV32I-NEXT: sll t3, a3, t6 -; RV32I-NEXT: or t5, t1, t3 +; RV32I-NEXT: srl t1, a7, a5 +; RV32I-NEXT: sll t3, a6, t4 +; RV32I-NEXT: sra a1, a4, a5 +; RV32I-NEXT: or t6, t1, t3 ; RV32I-NEXT: .LBB11_8: ; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a6, .LBB11_10 +; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: beqz a5, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: mv t3, t6 ; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: sub s0, t1, a6 -; RV32I-NEXT: bltu a6, t0, .LBB11_13 +; RV32I-NEXT: sub s0, t1, a5 +; RV32I-NEXT: bltu a5, t0, .LBB11_13 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: li t6, 0 ; RV32I-NEXT: bgeu s0, t0, .LBB11_14 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: sll t6, a7, t6 ; RV32I-NEXT: neg s1, s0 -; RV32I-NEXT: srl s1, a7, s1 -; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: srl s1, a3, s1 +; RV32I-NEXT: sll t4, a3, t4 +; RV32I-NEXT: or s2, s1, t5 ; RV32I-NEXT: j .LBB11_15 ; RV32I-NEXT: .LBB11_13: -; RV32I-NEXT: srl t5, a3, a6 +; RV32I-NEXT: srl t6, a6, a5 ; RV32I-NEXT: bltu s0, t0, .LBB11_12 ; RV32I-NEXT: .LBB11_14: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: sll s2, a7, s0 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: sll s2, a3, s0 ; RV32I-NEXT: .LBB11_15: -; RV32I-NEXT: addi s1, a6, -64 -; RV32I-NEXT: mv t4, a5 +; RV32I-NEXT: addi s1, a5, -64 +; RV32I-NEXT: mv t5, a4 ; RV32I-NEXT: beqz s0, .LBB11_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: mv t5, s2 ; RV32I-NEXT: .LBB11_17: ; RV32I-NEXT: bltu s1, t0, .LBB11_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: sra t2, a5, s1 +; RV32I-NEXT: sra t2, a4, s1 ; RV32I-NEXT: bnez s1, .LBB11_20 ; RV32I-NEXT: j .LBB11_21 ; RV32I-NEXT: .LBB11_19: ; RV32I-NEXT: neg s0, s1 -; RV32I-NEXT: sll s0, a5, s0 +; RV32I-NEXT: sll s0, a4, s0 ; RV32I-NEXT: or t2, t2, s0 ; RV32I-NEXT: beqz s1, .LBB11_21 ; RV32I-NEXT: .LBB11_20: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: mv a3, t2 ; RV32I-NEXT: .LBB11_21: ; RV32I-NEXT: bltu s1, t0, .LBB11_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: srai t0, a5, 31 -; RV32I-NEXT: bltu a6, t1, .LBB11_24 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: bltu a5, t1, .LBB11_24 ; RV32I-NEXT: j .LBB11_25 ; RV32I-NEXT: .LBB11_23: -; RV32I-NEXT: sra t0, a5, a6 -; RV32I-NEXT: bgeu a6, t1, .LBB11_25 +; RV32I-NEXT: sra t0, a4, a5 +; RV32I-NEXT: bgeu a5, t1, .LBB11_25 ; RV32I-NEXT: .LBB11_24: -; RV32I-NEXT: or a7, t3, t6 -; RV32I-NEXT: or t0, t5, t4 +; RV32I-NEXT: or a3, t3, t4 +; RV32I-NEXT: or t0, t6, t5 ; RV32I-NEXT: .LBB11_25: -; RV32I-NEXT: bnez a6, .LBB11_29 +; RV32I-NEXT: bnez a5, .LBB11_29 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: bltu a6, t1, .LBB11_28 +; RV32I-NEXT: bltu a5, t1, .LBB11_28 ; RV32I-NEXT: .LBB11_27: -; RV32I-NEXT: srai a0, a5, 31 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB11_28: -; RV32I-NEXT: srli a5, a4, 16 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: srli a7, a4, 24 -; RV32I-NEXT: srli t0, a3, 16 -; RV32I-NEXT: srli t1, a3, 24 -; RV32I-NEXT: srli t2, a0, 16 -; RV32I-NEXT: srli t3, a0, 24 -; RV32I-NEXT: srli t4, a1, 16 -; RV32I-NEXT: srli t5, a1, 24 -; RV32I-NEXT: addi a6, a6, -1 -; RV32I-NEXT: and t6, a4, a6 -; RV32I-NEXT: srli t6, t6, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb t6, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, a3, a6 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb a4, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, a0, a6 -; RV32I-NEXT: and a4, a1, a6 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: srli a4, a7, 16 +; RV32I-NEXT: and a5, a7, a3 +; RV32I-NEXT: srli t0, a7, 24 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a5, 1(a2) +; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: srli a4, a6, 16 +; RV32I-NEXT: and a5, a6, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a7, a6, 24 +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: sb a5, 5(a2) +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: sb a7, 7(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: and a5, a0, a3 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: sb a3, 9(a2) -; RV32I-NEXT: sb t2, 10(a2) -; RV32I-NEXT: sb t3, 11(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a4, 10(a2) +; RV32I-NEXT: sb a6, 11(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a4, 13(a2) -; RV32I-NEXT: sb t4, 14(a2) -; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a0, 14(a2) +; RV32I-NEXT: sb a4, 15(a2) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB11_29: -; RV32I-NEXT: mv a4, a7 -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: bgeu a6, t1, .LBB11_27 +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: bgeu a5, t1, .LBB11_27 ; RV32I-NEXT: j .LBB11_28 %src = load i128, ptr %src.ptr, align 1 %wordOff = load i128, ptr %wordOff.ptr, align 1 @@ -2677,311 +2649,297 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a6, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a5, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a5, a3, a5 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a7, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or t0, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a6, 16 +; RV64I-NEXT: slli a0, t0, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li a7, 64 -; RV64I-NEXT: slli t4, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 -; RV64I-NEXT: slli a5, a5, 3 -; RV64I-NEXT: sub t1, a5, a7 -; RV64I-NEXT: neg t5, a5 -; RV64I-NEXT: sll t3, t0, t5 -; RV64I-NEXT: bltu a5, a7, .LBB12_2 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: or t0, t1, t3 +; RV64I-NEXT: slli a6, a6, 3 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a6 +; RV64I-NEXT: sub t5, a6, t1 +; RV64I-NEXT: sll t4, t0, t3 +; RV64I-NEXT: bltu a6, t1, .LBB12_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t6, t0, t1 +; RV64I-NEXT: srl t6, t0, t5 ; RV64I-NEXT: j .LBB12_3 ; RV64I-NEXT: .LBB12_2: -; RV64I-NEXT: srl t6, a6, a5 -; RV64I-NEXT: or t6, t6, t3 +; RV64I-NEXT: srl t6, a3, a6 +; RV64I-NEXT: or t6, t6, t4 ; RV64I-NEXT: .LBB12_3: -; RV64I-NEXT: or a3, t4, a3 -; RV64I-NEXT: slli t4, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a6 -; RV64I-NEXT: beqz a5, .LBB12_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a6, .LBB12_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB12_5: -; RV64I-NEXT: or a4, t4, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a5, a7, .LBB12_7 +; RV64I-NEXT: or a5, a5, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a6, t1, .LBB12_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: srl t4, a3, t1 +; RV64I-NEXT: srl t5, a4, t5 ; RV64I-NEXT: j .LBB12_8 ; RV64I-NEXT: .LBB12_7: -; RV64I-NEXT: srl a0, t0, a5 -; RV64I-NEXT: srl t1, a4, a5 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t4, t1, t2 +; RV64I-NEXT: srl a7, a5, a6 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: srl a0, t0, a6 +; RV64I-NEXT: or t5, a7, t2 ; RV64I-NEXT: .LBB12_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a5, .LBB12_10 +; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: mv t2, a5 +; RV64I-NEXT: beqz a6, .LBB12_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t4 +; RV64I-NEXT: mv t2, t5 ; RV64I-NEXT: .LBB12_10: -; RV64I-NEXT: sub t6, t1, a5 -; RV64I-NEXT: bltu a5, a7, .LBB12_13 +; RV64I-NEXT: sub t6, a7, a6 +; RV64I-NEXT: bltu a6, t1, .LBB12_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t4, 0 -; RV64I-NEXT: bgeu t6, a7, .LBB12_14 +; RV64I-NEXT: li t5, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB12_14 ; RV64I-NEXT: .LBB12_12: -; RV64I-NEXT: sll t5, a6, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a6, s0 -; RV64I-NEXT: or s1, s0, t3 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t4 ; RV64I-NEXT: j .LBB12_15 ; RV64I-NEXT: .LBB12_13: -; RV64I-NEXT: srl t4, a3, a5 -; RV64I-NEXT: bltu t6, a7, .LBB12_12 +; RV64I-NEXT: srl t5, a4, a6 +; RV64I-NEXT: bltu t6, t1, .LBB12_12 ; RV64I-NEXT: .LBB12_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t3, t6, a7 -; RV64I-NEXT: sll s1, a6, t3 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t4, t6, t1 +; RV64I-NEXT: sll s1, a3, t4 ; RV64I-NEXT: .LBB12_15: -; RV64I-NEXT: sub s0, a5, t1 -; RV64I-NEXT: mv t3, t0 +; RV64I-NEXT: sub s0, a6, a7 +; RV64I-NEXT: mv t4, t0 ; RV64I-NEXT: beqz t6, .LBB12_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t3, s1 +; RV64I-NEXT: mv t4, s1 ; RV64I-NEXT: .LBB12_17: -; RV64I-NEXT: bltu s0, a7, .LBB12_19 +; RV64I-NEXT: bltu s0, t1, .LBB12_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, a7 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: srl t6, t0, t6 ; RV64I-NEXT: bnez s0, .LBB12_20 ; RV64I-NEXT: j .LBB12_21 ; RV64I-NEXT: .LBB12_19: -; RV64I-NEXT: srl t6, a6, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, t0, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, t0, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB12_21 ; RV64I-NEXT: .LBB12_20: -; RV64I-NEXT: mv a6, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB12_21: -; RV64I-NEXT: bltu s0, a7, .LBB12_23 +; RV64I-NEXT: bltu s0, t1, .LBB12_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: li a7, 0 -; RV64I-NEXT: bltu a5, t1, .LBB12_24 +; RV64I-NEXT: li t0, 0 +; RV64I-NEXT: bltu a6, a7, .LBB12_24 ; RV64I-NEXT: j .LBB12_25 ; RV64I-NEXT: .LBB12_23: -; RV64I-NEXT: srl a7, t0, s0 -; RV64I-NEXT: bgeu a5, t1, .LBB12_25 +; RV64I-NEXT: srl t0, t0, s0 +; RV64I-NEXT: bgeu a6, a7, .LBB12_25 ; RV64I-NEXT: .LBB12_24: -; RV64I-NEXT: or a6, t2, t5 -; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t0, t5, t4 ; RV64I-NEXT: .LBB12_25: -; RV64I-NEXT: bnez a5, .LBB12_29 +; RV64I-NEXT: bnez a6, .LBB12_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a5, t1, .LBB12_28 +; RV64I-NEXT: bltu a6, a7, .LBB12_28 ; RV64I-NEXT: .LBB12_27: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: li a0, 0 ; RV64I-NEXT: .LBB12_28: +; RV64I-NEXT: srli a6, a5, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a5, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a5, 24 +; RV64I-NEXT: and t1, a5, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a5, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a6, a3 +; RV64I-NEXT: srli t0, a5, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb t0, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) ; RV64I-NEXT: srli a5, a4, 32 ; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB12_29: -; RV64I-NEXT: mv a4, a6 -; RV64I-NEXT: mv a3, a7 -; RV64I-NEXT: bgeu a5, t1, .LBB12_27 +; RV64I-NEXT: mv a5, a3 +; RV64I-NEXT: mv a4, t0 +; RV64I-NEXT: bgeu a6, a7, .LBB12_27 ; RV64I-NEXT: j .LBB12_28 ; ; RV32I-LABEL: lshr_32bytes: @@ -3000,749 +2958,758 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 6(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a5, a4, a7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a4, t0, t2 +; RV32I-NEXT: or a7, t1, t3 +; RV32I-NEXT: lbu t0, 9(a0) +; RV32I-NEXT: lbu t3, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t2, 3(a1) +; RV32I-NEXT: lbu t5, 8(a0) ; RV32I-NEXT: lbu t6, 10(a0) ; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: or a7, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 13(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu s1, 15(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or t3, t3, t2 -; RV32I-NEXT: or t0, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: lbu t2, 1(a1) -; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t5 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or s0, t2, t4 -; RV32I-NEXT: slli t2, s1, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t6 -; RV32I-NEXT: slli t4, a7, 16 -; RV32I-NEXT: slli a7, t3, 16 -; RV32I-NEXT: slli t3, t5, 16 -; RV32I-NEXT: slli t5, a1, 16 -; RV32I-NEXT: or a1, a7, t1 -; RV32I-NEXT: or a7, t5, s0 -; RV32I-NEXT: slli a7, a7, 3 -; RV32I-NEXT: srli t1, a7, 5 -; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, t2, t1 +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a7, 15(a0) +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or t4, s0, t6 +; RV32I-NEXT: or a4, a4, t3 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli a4, a4, 3 +; RV32I-NEXT: slli s0, a7, 8 +; RV32I-NEXT: andi t5, a4, 31 +; RV32I-NEXT: srli a7, a4, 5 ; RV32I-NEXT: neg s3, t5 ; RV32I-NEXT: beqz t5, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a4, a1, s3 +; RV32I-NEXT: sll a6, a1, s3 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: or s7, t4, a3 ; RV32I-NEXT: lbu t4, 12(a0) ; RV32I-NEXT: lbu t6, 19(a0) -; RV32I-NEXT: slli s1, a6, 8 -; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, s0, t1 ; RV32I-NEXT: or a3, t3, t0 -; RV32I-NEXT: beqz t1, .LBB12_4 +; RV32I-NEXT: sw a5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a7, .LBB12_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: srl s0, s7, a7 -; RV32I-NEXT: or s0, s0, a4 +; RV32I-NEXT: srl t0, a5, a4 +; RV32I-NEXT: or s0, t0, a6 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: lbu t0, 17(a0) -; RV32I-NEXT: lbu a4, 18(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s2, s1, t4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s2, 17(a0) +; RV32I-NEXT: lbu a6, 18(a0) +; RV32I-NEXT: slli s1, t6, 8 +; RV32I-NEXT: or s4, t2, t4 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: li s5, 1 ; RV32I-NEXT: sll t6, a3, s3 ; RV32I-NEXT: beqz t5, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: .LBB12_7: ; RV32I-NEXT: lbu t2, 16(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli s1, t0, 8 -; RV32I-NEXT: or t4, s4, a4 -; RV32I-NEXT: srl a4, a1, a7 -; RV32I-NEXT: or a5, a5, s2 -; RV32I-NEXT: bne t1, s5, .LBB12_9 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or t4, s1, a6 +; RV32I-NEXT: srl s1, a1, a4 +; RV32I-NEXT: or a6, t1, s4 +; RV32I-NEXT: bne a7, s5, .LBB12_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: or s0, s1, t0 ; RV32I-NEXT: .LBB12_9: -; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu s5, 21(a0) -; RV32I-NEXT: lbu a6, 22(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t2, s1, t2 -; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu t0, 22(a0) +; RV32I-NEXT: slli s5, t3, 8 +; RV32I-NEXT: or t2, s2, t2 +; RV32I-NEXT: slli s7, t4, 16 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: sll t3, a6, s3 ; RV32I-NEXT: beqz t5, .LBB12_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: mv t1, t3 ; RV32I-NEXT: .LBB12_11: -; RV32I-NEXT: lbu s1, 20(a0) -; RV32I-NEXT: lbu s2, 27(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s4, s4, a6 -; RV32I-NEXT: srl t4, a3, a7 -; RV32I-NEXT: or a6, s6, t2 -; RV32I-NEXT: bne t1, s8, .LBB12_13 +; RV32I-NEXT: lbu s2, 20(a0) +; RV32I-NEXT: lbu s4, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s5, s5, t0 +; RV32I-NEXT: srl t4, a3, a4 +; RV32I-NEXT: or t0, s7, t2 +; RV32I-NEXT: bne a7, s8, .LBB12_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or s0, t4, t0 +; RV32I-NEXT: or s0, t4, t1 ; RV32I-NEXT: .LBB12_13: -; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s6, 25(a0) -; RV32I-NEXT: lbu t0, 26(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s7, s5, s1 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: lbu s7, 25(a0) +; RV32I-NEXT: lbu t1, 26(a0) +; RV32I-NEXT: slli s9, s4, 8 +; RV32I-NEXT: or s8, s6, s2 +; RV32I-NEXT: slli s10, s5, 16 +; RV32I-NEXT: sll s11, t0, s3 ; RV32I-NEXT: beqz t5, .LBB12_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB12_15: -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: lbu s2, 31(a0) -; RV32I-NEXT: slli s5, s6, 8 -; RV32I-NEXT: or s4, s8, t0 -; RV32I-NEXT: srl ra, a5, a7 -; RV32I-NEXT: or t0, s9, s7 -; RV32I-NEXT: li s6, 3 -; RV32I-NEXT: bne t1, s6, .LBB12_17 +; RV32I-NEXT: lbu s2, 24(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli s6, s7, 8 +; RV32I-NEXT: or s5, s9, t1 +; RV32I-NEXT: srl ra, a6, a4 +; RV32I-NEXT: or t1, s10, s8 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB12_17 ; RV32I-NEXT: # %bb.16: ; RV32I-NEXT: or s0, ra, t2 ; RV32I-NEXT: .LBB12_17: ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s7, 29(a0) -; RV32I-NEXT: lbu s6, 30(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s2, s5, s1 -; RV32I-NEXT: slli s5, s4, 16 -; RV32I-NEXT: li s9, 4 -; RV32I-NEXT: sll s1, t0, s3 -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s8, 29(a0) +; RV32I-NEXT: lbu s7, 30(a0) +; RV32I-NEXT: slli s10, s4, 8 +; RV32I-NEXT: or s4, s6, s2 +; RV32I-NEXT: slli s6, s5, 16 +; RV32I-NEXT: li a5, 4 +; RV32I-NEXT: sll s2, t1, s3 +; RV32I-NEXT: sw s2, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t5, .LBB12_19 ; RV32I-NEXT: # %bb.18: ; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB12_19: -; RV32I-NEXT: lbu s1, 28(a0) -; RV32I-NEXT: slli s7, s7, 8 -; RV32I-NEXT: or s4, s8, s6 -; RV32I-NEXT: srl s10, a6, a7 -; RV32I-NEXT: or a0, s5, s2 -; RV32I-NEXT: bne t1, s9, .LBB12_21 +; RV32I-NEXT: lbu s2, 28(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s5, s10, s7 +; RV32I-NEXT: srl s10, t0, a4 +; RV32I-NEXT: or a0, s6, s4 +; RV32I-NEXT: bne a7, a5, .LBB12_21 ; RV32I-NEXT: # %bb.20: ; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB12_21: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: or t2, s7, s1 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or t2, s8, s2 +; RV32I-NEXT: slli s5, s5, 16 +; RV32I-NEXT: li s8, 5 ; RV32I-NEXT: sll s7, a0, s3 ; RV32I-NEXT: beqz t5, .LBB12_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s2, s7 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB12_23: -; RV32I-NEXT: srl s8, t0, a7 -; RV32I-NEXT: or t2, s4, t2 -; RV32I-NEXT: bne t1, s9, .LBB12_25 +; RV32I-NEXT: srl a5, t1, a4 +; RV32I-NEXT: or t2, s5, t2 +; RV32I-NEXT: beq a7, s8, .LBB12_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: or s0, s8, s2 +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: j .LBB12_26 ; RV32I-NEXT: .LBB12_25: +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: or s0, a5, s4 +; RV32I-NEXT: .LBB12_26: ; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: sll s5, t2, s3 -; RV32I-NEXT: beqz t5, .LBB12_27 -; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: beqz t5, .LBB12_28 +; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: mv s4, s5 -; RV32I-NEXT: .LBB12_27: -; RV32I-NEXT: srl s6, a0, a7 -; RV32I-NEXT: bne t1, s2, .LBB12_29 -; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: .LBB12_28: +; RV32I-NEXT: srl s6, a0, a4 +; RV32I-NEXT: bne a7, a5, .LBB12_30 +; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: or s0, s6, s4 -; RV32I-NEXT: .LBB12_29: +; RV32I-NEXT: .LBB12_30: ; RV32I-NEXT: li s3, 7 -; RV32I-NEXT: srl s1, t2, a7 -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: bne t1, s3, .LBB12_34 -; RV32I-NEXT: # %bb.30: -; RV32I-NEXT: bnez a7, .LBB12_35 -; RV32I-NEXT: .LBB12_31: -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: bnez t5, .LBB12_36 +; RV32I-NEXT: srl s2, t2, a4 +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: bne a7, s3, .LBB12_35 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: bnez a4, .LBB12_36 ; RV32I-NEXT: .LBB12_32: -; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: beqz t1, .LBB12_37 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB12_37 ; RV32I-NEXT: .LBB12_33: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB12_38 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz a7, .LBB12_38 ; RV32I-NEXT: .LBB12_34: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: beqz a7, .LBB12_31 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: j .LBB12_39 ; RV32I-NEXT: .LBB12_35: +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a4, .LBB12_32 +; RV32I-NEXT: .LBB12_36: ; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: beqz t5, .LBB12_32 -; RV32I-NEXT: .LBB12_36: +; RV32I-NEXT: beqz t5, .LBB12_33 +; RV32I-NEXT: .LBB12_37: ; RV32I-NEXT: mv s0, t6 ; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: bnez t1, .LBB12_33 -; RV32I-NEXT: .LBB12_37: -; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: bnez a7, .LBB12_34 ; RV32I-NEXT: .LBB12_38: -; RV32I-NEXT: li s0, 1 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: bnez t5, .LBB12_57 -; RV32I-NEXT: # %bb.39: -; RV32I-NEXT: beq t1, s0, .LBB12_58 -; RV32I-NEXT: .LBB12_40: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s1, s0 +; RV32I-NEXT: .LBB12_39: +; RV32I-NEXT: li s1, 1 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB12_59 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: beq a7, s1, .LBB12_60 ; RV32I-NEXT: .LBB12_41: -; RV32I-NEXT: beq t1, s4, .LBB12_60 -; RV32I-NEXT: .LBB12_42: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB12_61 +; RV32I-NEXT: .LBB12_42: +; RV32I-NEXT: beq a7, s4, .LBB12_62 ; RV32I-NEXT: .LBB12_43: -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: bne t1, s4, .LBB12_45 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB12_63 ; RV32I-NEXT: .LBB12_44: -; RV32I-NEXT: or a4, s10, t6 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB12_46 ; RV32I-NEXT: .LBB12_45: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: bnez t5, .LBB12_62 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: beq t1, s4, .LBB12_63 -; RV32I-NEXT: .LBB12_47: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s10, s0 +; RV32I-NEXT: .LBB12_46: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li a5, 4 ; RV32I-NEXT: bnez t5, .LBB12_64 +; RV32I-NEXT: # %bb.47: +; RV32I-NEXT: beq a7, a5, .LBB12_65 ; RV32I-NEXT: .LBB12_48: -; RV32I-NEXT: beq t1, s9, .LBB12_65 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB12_66 ; RV32I-NEXT: .LBB12_49: -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: bne t1, s2, .LBB12_66 +; RV32I-NEXT: bne a7, s8, .LBB12_51 ; RV32I-NEXT: .LBB12_50: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_67 +; RV32I-NEXT: or t6, s6, s0 ; RV32I-NEXT: .LBB12_51: -; RV32I-NEXT: beqz a7, .LBB12_53 -; RV32I-NEXT: .LBB12_52: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: bne a7, s8, .LBB12_67 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_68 ; RV32I-NEXT: .LBB12_53: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: li t6, 2 -; RV32I-NEXT: beqz t5, .LBB12_55 -; RV32I-NEXT: # %bb.54: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a4, .LBB12_55 +; RV32I-NEXT: .LBB12_54: +; RV32I-NEXT: mv a1, t6 ; RV32I-NEXT: .LBB12_55: -; RV32I-NEXT: beqz t1, .LBB12_68 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz t5, .LBB12_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB12_69 -; RV32I-NEXT: .LBB12_57: ; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: bne t1, s0, .LBB12_40 -; RV32I-NEXT: .LBB12_58: -; RV32I-NEXT: or a4, t4, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB12_41 +; RV32I-NEXT: .LBB12_57: +; RV32I-NEXT: beqz a7, .LBB12_69 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: j .LBB12_70 ; RV32I-NEXT: .LBB12_59: -; RV32I-NEXT: mv t6, s11 -; RV32I-NEXT: bne t1, s4, .LBB12_42 +; RV32I-NEXT: mv s0, t3 +; RV32I-NEXT: bne a7, s1, .LBB12_41 ; RV32I-NEXT: .LBB12_60: -; RV32I-NEXT: or a4, ra, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB12_43 +; RV32I-NEXT: or t6, t4, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB12_42 ; RV32I-NEXT: .LBB12_61: -; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: beq t1, s4, .LBB12_44 -; RV32I-NEXT: j .LBB12_45 +; RV32I-NEXT: mv s0, s11 +; RV32I-NEXT: bne a7, s4, .LBB12_43 ; RV32I-NEXT: .LBB12_62: -; RV32I-NEXT: mv t6, s7 -; RV32I-NEXT: bne t1, s4, .LBB12_47 +; RV32I-NEXT: or t6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB12_44 ; RV32I-NEXT: .LBB12_63: -; RV32I-NEXT: or a4, s8, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB12_48 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: beq a7, a5, .LBB12_45 +; RV32I-NEXT: j .LBB12_46 ; RV32I-NEXT: .LBB12_64: -; RV32I-NEXT: mv t6, s5 -; RV32I-NEXT: bne t1, s9, .LBB12_49 +; RV32I-NEXT: mv s0, s7 +; RV32I-NEXT: bne a7, a5, .LBB12_48 ; RV32I-NEXT: .LBB12_65: -; RV32I-NEXT: or a4, s6, t6 -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: beq t1, s2, .LBB12_50 +; RV32I-NEXT: or t6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB12_49 ; RV32I-NEXT: .LBB12_66: -; RV32I-NEXT: mv t6, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_51 +; RV32I-NEXT: mv s0, s5 +; RV32I-NEXT: beq a7, s8, .LBB12_50 +; RV32I-NEXT: j .LBB12_51 ; RV32I-NEXT: .LBB12_67: -; RV32I-NEXT: mv a4, t6 -; RV32I-NEXT: bnez a7, .LBB12_52 -; RV32I-NEXT: j .LBB12_53 +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_53 ; RV32I-NEXT: .LBB12_68: -; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: mv t6, s0 +; RV32I-NEXT: bnez a4, .LBB12_54 +; RV32I-NEXT: j .LBB12_55 ; RV32I-NEXT: .LBB12_69: -; RV32I-NEXT: li t4, 3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_84 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: beq t1, s0, .LBB12_85 -; RV32I-NEXT: .LBB12_71: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_86 +; RV32I-NEXT: or t3, t4, t6 +; RV32I-NEXT: .LBB12_70: +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_85 +; RV32I-NEXT: # %bb.71: +; RV32I-NEXT: beq a7, s1, .LBB12_86 ; RV32I-NEXT: .LBB12_72: -; RV32I-NEXT: beq t1, t6, .LBB12_87 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_87 ; RV32I-NEXT: .LBB12_73: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_88 +; RV32I-NEXT: beq a7, s4, .LBB12_88 ; RV32I-NEXT: .LBB12_74: -; RV32I-NEXT: beq t1, t4, .LBB12_89 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_89 ; RV32I-NEXT: .LBB12_75: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_90 +; RV32I-NEXT: beq a7, t6, .LBB12_90 ; RV32I-NEXT: .LBB12_76: -; RV32I-NEXT: beq t1, s4, .LBB12_91 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_91 ; RV32I-NEXT: .LBB12_77: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s9, .LBB12_92 +; RV32I-NEXT: beq a7, a5, .LBB12_92 ; RV32I-NEXT: .LBB12_78: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_93 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s0, .LBB12_93 ; RV32I-NEXT: .LBB12_79: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_94 +; RV32I-NEXT: bne a7, s8, .LBB12_94 ; RV32I-NEXT: .LBB12_80: -; RV32I-NEXT: bnez a7, .LBB12_95 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_95 ; RV32I-NEXT: .LBB12_81: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB12_96 +; RV32I-NEXT: bnez a4, .LBB12_96 ; RV32I-NEXT: .LBB12_82: -; RV32I-NEXT: beqz t1, .LBB12_97 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_97 ; RV32I-NEXT: .LBB12_83: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB12_98 +; RV32I-NEXT: beqz a7, .LBB12_98 ; RV32I-NEXT: .LBB12_84: -; RV32I-NEXT: mv t3, s11 -; RV32I-NEXT: bne t1, s0, .LBB12_71 -; RV32I-NEXT: .LBB12_85: -; RV32I-NEXT: or a4, ra, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB12_72 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_99 +; RV32I-NEXT: j .LBB12_100 +; RV32I-NEXT: .LBB12_85: +; RV32I-NEXT: mv t4, s11 +; RV32I-NEXT: bne a7, s1, .LBB12_72 ; RV32I-NEXT: .LBB12_86: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, t6, .LBB12_73 +; RV32I-NEXT: or t3, ra, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_73 ; RV32I-NEXT: .LBB12_87: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB12_74 +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, s4, .LBB12_74 ; RV32I-NEXT: .LBB12_88: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t4, .LBB12_75 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_75 ; RV32I-NEXT: .LBB12_89: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB12_76 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, t6, .LBB12_76 ; RV32I-NEXT: .LBB12_90: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, s4, .LBB12_77 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_77 ; RV32I-NEXT: .LBB12_91: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s9, .LBB12_78 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, a5, .LBB12_78 ; RV32I-NEXT: .LBB12_92: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_79 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s0, .LBB12_79 ; RV32I-NEXT: .LBB12_93: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_80 +; RV32I-NEXT: beq a7, s8, .LBB12_80 ; RV32I-NEXT: .LBB12_94: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB12_81 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_81 ; RV32I-NEXT: .LBB12_95: -; RV32I-NEXT: mv a3, t3 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB12_82 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB12_82 ; RV32I-NEXT: .LBB12_96: -; RV32I-NEXT: mv a4, s11 -; RV32I-NEXT: bnez t1, .LBB12_83 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_83 ; RV32I-NEXT: .LBB12_97: -; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bnez a7, .LBB12_84 ; RV32I-NEXT: .LBB12_98: -; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_112 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: beq t1, s0, .LBB12_113 +; RV32I-NEXT: or t3, ra, t3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_100 +; RV32I-NEXT: .LBB12_99: +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB12_100: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s1, .LBB12_113 +; RV32I-NEXT: # %bb.101: +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB12_114 -; RV32I-NEXT: .LBB12_101: -; RV32I-NEXT: beq t1, t6, .LBB12_115 ; RV32I-NEXT: .LBB12_102: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB12_116 +; RV32I-NEXT: beq a7, s4, .LBB12_115 ; RV32I-NEXT: .LBB12_103: -; RV32I-NEXT: beq t1, t4, .LBB12_117 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB12_116 ; RV32I-NEXT: .LBB12_104: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s4, .LBB12_118 +; RV32I-NEXT: beq a7, t6, .LBB12_117 ; RV32I-NEXT: .LBB12_105: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB12_119 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, a5, .LBB12_118 ; RV32I-NEXT: .LBB12_106: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_120 +; RV32I-NEXT: bne a7, s0, .LBB12_119 ; RV32I-NEXT: .LBB12_107: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_121 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB12_120 ; RV32I-NEXT: .LBB12_108: -; RV32I-NEXT: bnez a7, .LBB12_122 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_121 ; RV32I-NEXT: .LBB12_109: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB12_123 +; RV32I-NEXT: bnez a4, .LBB12_122 ; RV32I-NEXT: .LBB12_110: -; RV32I-NEXT: beqz t1, .LBB12_124 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_123 ; RV32I-NEXT: .LBB12_111: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz a7, .LBB12_124 +; RV32I-NEXT: .LBB12_112: ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB12_125 ; RV32I-NEXT: j .LBB12_126 -; RV32I-NEXT: .LBB12_112: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s0, .LBB12_100 ; RV32I-NEXT: .LBB12_113: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB12_101 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_102 ; RV32I-NEXT: .LBB12_114: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t6, .LBB12_102 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, s4, .LBB12_103 ; RV32I-NEXT: .LBB12_115: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB12_103 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB12_104 ; RV32I-NEXT: .LBB12_116: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t4, .LBB12_104 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, t6, .LBB12_105 ; RV32I-NEXT: .LBB12_117: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s4, .LBB12_105 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, a5, .LBB12_106 ; RV32I-NEXT: .LBB12_118: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB12_106 -; RV32I-NEXT: .LBB12_119: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_107 +; RV32I-NEXT: beq a7, s0, .LBB12_107 +; RV32I-NEXT: .LBB12_119: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB12_108 ; RV32I-NEXT: .LBB12_120: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_108 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_109 ; RV32I-NEXT: .LBB12_121: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB12_109 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB12_110 ; RV32I-NEXT: .LBB12_122: -; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB12_110 +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_111 ; RV32I-NEXT: .LBB12_123: -; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB12_111 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a7, .LBB12_112 ; RV32I-NEXT: .LBB12_124: -; RV32I-NEXT: or a4, s10, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s10, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB12_126 ; RV32I-NEXT: .LBB12_125: -; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: mv t4, s7 ; RV32I-NEXT: .LBB12_126: -; RV32I-NEXT: beq t1, s0, .LBB12_138 +; RV32I-NEXT: beq a7, s1, .LBB12_138 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB12_139 ; RV32I-NEXT: .LBB12_128: -; RV32I-NEXT: beq t1, t6, .LBB12_140 +; RV32I-NEXT: beq a7, s4, .LBB12_140 ; RV32I-NEXT: .LBB12_129: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t4, .LBB12_141 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, t6, .LBB12_141 ; RV32I-NEXT: .LBB12_130: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB12_142 -; RV32I-NEXT: .LBB12_131: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB12_143 +; RV32I-NEXT: bne a7, a5, .LBB12_142 +; RV32I-NEXT: .LBB12_131: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB12_143 ; RV32I-NEXT: .LBB12_132: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_144 -; RV32I-NEXT: .LBB12_133: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_145 +; RV32I-NEXT: bne a7, s8, .LBB12_144 +; RV32I-NEXT: .LBB12_133: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_145 ; RV32I-NEXT: .LBB12_134: -; RV32I-NEXT: bnez a7, .LBB12_146 +; RV32I-NEXT: bnez a4, .LBB12_146 ; RV32I-NEXT: .LBB12_135: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB12_147 ; RV32I-NEXT: .LBB12_136: -; RV32I-NEXT: beqz t1, .LBB12_148 +; RV32I-NEXT: beqz a7, .LBB12_148 ; RV32I-NEXT: .LBB12_137: -; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB12_149 ; RV32I-NEXT: j .LBB12_150 ; RV32I-NEXT: .LBB12_138: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB12_128 ; RV32I-NEXT: .LBB12_139: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t6, .LBB12_129 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, s4, .LBB12_129 ; RV32I-NEXT: .LBB12_140: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t4, .LBB12_130 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, t6, .LBB12_130 ; RV32I-NEXT: .LBB12_141: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB12_131 -; RV32I-NEXT: .LBB12_142: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB12_132 +; RV32I-NEXT: beq a7, a5, .LBB12_131 +; RV32I-NEXT: .LBB12_142: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB12_132 ; RV32I-NEXT: .LBB12_143: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_133 -; RV32I-NEXT: .LBB12_144: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_134 +; RV32I-NEXT: beq a7, s8, .LBB12_133 +; RV32I-NEXT: .LBB12_144: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_134 ; RV32I-NEXT: .LBB12_145: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB12_135 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB12_135 ; RV32I-NEXT: .LBB12_146: -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB12_136 ; RV32I-NEXT: .LBB12_147: -; RV32I-NEXT: mv a4, s7 -; RV32I-NEXT: bnez t1, .LBB12_137 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bnez a7, .LBB12_137 ; RV32I-NEXT: .LBB12_148: -; RV32I-NEXT: or a4, s8, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB12_150 ; RV32I-NEXT: .LBB12_149: -; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: mv t4, s5 ; RV32I-NEXT: .LBB12_150: -; RV32I-NEXT: beq t1, s0, .LBB12_161 +; RV32I-NEXT: beq a7, s1, .LBB12_161 ; RV32I-NEXT: # %bb.151: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t6, .LBB12_162 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s4, .LBB12_162 ; RV32I-NEXT: .LBB12_152: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB12_163 -; RV32I-NEXT: .LBB12_153: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB12_164 +; RV32I-NEXT: bne a7, t6, .LBB12_163 +; RV32I-NEXT: .LBB12_153: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, a5, .LBB12_164 ; RV32I-NEXT: .LBB12_154: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB12_165 -; RV32I-NEXT: .LBB12_155: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_166 +; RV32I-NEXT: bne a7, s0, .LBB12_165 +; RV32I-NEXT: .LBB12_155: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB12_166 ; RV32I-NEXT: .LBB12_156: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_167 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_167 ; RV32I-NEXT: .LBB12_157: -; RV32I-NEXT: bnez a7, .LBB12_168 +; RV32I-NEXT: bnez a4, .LBB12_168 ; RV32I-NEXT: .LBB12_158: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB12_169 ; RV32I-NEXT: .LBB12_159: -; RV32I-NEXT: beqz t1, .LBB12_170 +; RV32I-NEXT: beqz a7, .LBB12_170 ; RV32I-NEXT: .LBB12_160: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s0, .LBB12_171 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s1, .LBB12_171 ; RV32I-NEXT: j .LBB12_172 ; RV32I-NEXT: .LBB12_161: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t6, .LBB12_152 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s4, .LBB12_152 ; RV32I-NEXT: .LBB12_162: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB12_153 -; RV32I-NEXT: .LBB12_163: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB12_154 +; RV32I-NEXT: beq a7, t6, .LBB12_153 +; RV32I-NEXT: .LBB12_163: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, a5, .LBB12_154 ; RV32I-NEXT: .LBB12_164: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB12_155 -; RV32I-NEXT: .LBB12_165: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_156 +; RV32I-NEXT: beq a7, s0, .LBB12_155 +; RV32I-NEXT: .LBB12_165: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB12_156 ; RV32I-NEXT: .LBB12_166: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_157 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_157 ; RV32I-NEXT: .LBB12_167: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB12_158 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB12_158 ; RV32I-NEXT: .LBB12_168: -; RV32I-NEXT: mv t0, a4 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB12_159 ; RV32I-NEXT: .LBB12_169: -; RV32I-NEXT: mv a4, s5 -; RV32I-NEXT: bnez t1, .LBB12_160 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bnez a7, .LBB12_160 ; RV32I-NEXT: .LBB12_170: -; RV32I-NEXT: or a4, s6, a4 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s0, .LBB12_172 +; RV32I-NEXT: or t3, s6, t3 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s1, .LBB12_172 ; RV32I-NEXT: .LBB12_171: -; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB12_172: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t6, .LBB12_190 -; RV32I-NEXT: # %bb.173: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t4, .LBB12_191 +; RV32I-NEXT: bne a7, s4, .LBB12_192 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, t6, .LBB12_193 ; RV32I-NEXT: .LBB12_174: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB12_192 -; RV32I-NEXT: .LBB12_175: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB12_193 +; RV32I-NEXT: bne a7, a5, .LBB12_194 +; RV32I-NEXT: .LBB12_175: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB12_195 ; RV32I-NEXT: .LBB12_176: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_194 -; RV32I-NEXT: .LBB12_177: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_195 +; RV32I-NEXT: bne a7, s8, .LBB12_196 +; RV32I-NEXT: .LBB12_177: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_197 ; RV32I-NEXT: .LBB12_178: -; RV32I-NEXT: bnez a7, .LBB12_196 +; RV32I-NEXT: bnez a4, .LBB12_198 ; RV32I-NEXT: .LBB12_179: -; RV32I-NEXT: bnez t1, .LBB12_197 +; RV32I-NEXT: bnez a7, .LBB12_199 ; RV32I-NEXT: .LBB12_180: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s0, .LBB12_198 -; RV32I-NEXT: .LBB12_181: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t6, .LBB12_199 +; RV32I-NEXT: bne a7, s1, .LBB12_200 +; RV32I-NEXT: .LBB12_181: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s4, .LBB12_183 ; RV32I-NEXT: .LBB12_182: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB12_200 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB12_183: +; RV32I-NEXT: li t5, 4 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB12_201 -; RV32I-NEXT: .LBB12_184: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB12_202 +; RV32I-NEXT: beq a7, t6, .LBB12_185 +; RV32I-NEXT: # %bb.184: +; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB12_185: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, t5, .LBB12_201 +; RV32I-NEXT: # %bb.186: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB12_203 -; RV32I-NEXT: .LBB12_186: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB12_204 +; RV32I-NEXT: bne a7, s0, .LBB12_202 ; RV32I-NEXT: .LBB12_187: -; RV32I-NEXT: beqz a7, .LBB12_189 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB12_203 ; RV32I-NEXT: .LBB12_188: -; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB12_204 ; RV32I-NEXT: .LBB12_189: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t3, ra, 24 -; RV32I-NEXT: srli a7, a1, 16 -; RV32I-NEXT: srli t6, a1, 24 -; RV32I-NEXT: srli t1, a3, 16 -; RV32I-NEXT: srli s2, a3, 24 -; RV32I-NEXT: srli t5, a5, 16 -; RV32I-NEXT: srli s3, a5, 24 -; RV32I-NEXT: srli s1, a6, 16 -; RV32I-NEXT: srli s6, a6, 24 -; RV32I-NEXT: srli s0, t0, 16 -; RV32I-NEXT: srli s5, t0, 24 -; RV32I-NEXT: srli s4, a0, 16 -; RV32I-NEXT: srli s7, a0, 24 -; RV32I-NEXT: srli s8, t2, 16 -; RV32I-NEXT: srli s9, t2, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s10, ra, t4 -; RV32I-NEXT: and s11, a1, t4 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t3, 3(a2) -; RV32I-NEXT: and a4, a3, t4 -; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: beqz a4, .LBB12_191 +; RV32I-NEXT: .LBB12_190: +; RV32I-NEXT: mv t2, t3 +; RV32I-NEXT: .LBB12_191: +; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: and t3, a5, a4 +; RV32I-NEXT: srli t4, a5, 24 +; RV32I-NEXT: srli t3, t3, 8 +; RV32I-NEXT: sb a5, 0(a2) +; RV32I-NEXT: sb t3, 1(a2) +; RV32I-NEXT: sb a7, 2(a2) +; RV32I-NEXT: sb t4, 3(a2) +; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: and a7, a1, a4 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: srli t3, a1, 24 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb t3, 5(a2) -; RV32I-NEXT: sb a7, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and a1, a5, t4 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a7, 5(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb t3, 7(a2) +; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: and a5, a3, a4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a7, a3, 24 ; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a3, a6, t4 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a5, 12(a2) -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a7, 11(a2) +; RV32I-NEXT: srli a1, a6, 16 +; RV32I-NEXT: and a3, a6, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: and a3, t0, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb t0, 16(a2) ; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb s6, 19(a2) -; RV32I-NEXT: and a3, a0, t4 -; RV32I-NEXT: and a4, t2, t4 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a1, t1, 16 +; RV32I-NEXT: and a3, t1, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a1, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: srli a5, t1, 24 +; RV32I-NEXT: sb t1, 20(a2) +; RV32I-NEXT: sb a3, 21(a2) +; RV32I-NEXT: sb a1, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, t2, 16 +; RV32I-NEXT: and a1, t2, a4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, t2, 24 ; RV32I-NEXT: sb t2, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload @@ -3758,64 +3725,57 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB12_190: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t4, .LBB12_174 -; RV32I-NEXT: .LBB12_191: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB12_175 ; RV32I-NEXT: .LBB12_192: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB12_176 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, t6, .LBB12_174 ; RV32I-NEXT: .LBB12_193: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_177 -; RV32I-NEXT: .LBB12_194: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_178 +; RV32I-NEXT: beq a7, a5, .LBB12_175 +; RV32I-NEXT: .LBB12_194: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB12_176 ; RV32I-NEXT: .LBB12_195: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB12_179 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s8, .LBB12_177 ; RV32I-NEXT: .LBB12_196: -; RV32I-NEXT: mv a0, t3 -; RV32I-NEXT: beqz t1, .LBB12_180 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_178 ; RV32I-NEXT: .LBB12_197: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s0, .LBB12_181 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB12_179 ; RV32I-NEXT: .LBB12_198: -; RV32I-NEXT: mv a4, s1 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t6, .LBB12_182 +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: beqz a7, .LBB12_180 ; RV32I-NEXT: .LBB12_199: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB12_183 -; RV32I-NEXT: .LBB12_200: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB12_184 +; RV32I-NEXT: beq a7, s1, .LBB12_181 +; RV32I-NEXT: .LBB12_200: +; RV32I-NEXT: mv t3, s2 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s4, .LBB12_182 +; RV32I-NEXT: j .LBB12_183 ; RV32I-NEXT: .LBB12_201: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB12_185 -; RV32I-NEXT: .LBB12_202: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB12_186 +; RV32I-NEXT: beq a7, s0, .LBB12_187 +; RV32I-NEXT: .LBB12_202: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB12_188 ; RV32I-NEXT: .LBB12_203: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB12_187 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB12_189 ; RV32I-NEXT: .LBB12_204: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: bnez a7, .LBB12_188 -; RV32I-NEXT: j .LBB12_189 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: bnez a4, .LBB12_190 +; RV32I-NEXT: j .LBB12_191 %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -3827,311 +3787,297 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a6, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a5, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a5, a3, a5 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a7, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or t0, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a6, 16 +; RV64I-NEXT: slli a0, t0, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li a7, 64 -; RV64I-NEXT: slli t4, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 -; RV64I-NEXT: slli a5, a5, 5 -; RV64I-NEXT: sub t1, a5, a7 -; RV64I-NEXT: neg t5, a5 -; RV64I-NEXT: sll t3, t0, t5 -; RV64I-NEXT: bltu a5, a7, .LBB13_2 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: or t0, t1, t3 +; RV64I-NEXT: slli a6, a6, 5 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a6 +; RV64I-NEXT: sub t5, a6, t1 +; RV64I-NEXT: sll t4, t0, t3 +; RV64I-NEXT: bltu a6, t1, .LBB13_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t6, t0, t1 +; RV64I-NEXT: srl t6, t0, t5 ; RV64I-NEXT: j .LBB13_3 ; RV64I-NEXT: .LBB13_2: -; RV64I-NEXT: srl t6, a6, a5 -; RV64I-NEXT: or t6, t6, t3 +; RV64I-NEXT: srl t6, a3, a6 +; RV64I-NEXT: or t6, t6, t4 ; RV64I-NEXT: .LBB13_3: -; RV64I-NEXT: or a3, t4, a3 -; RV64I-NEXT: slli t4, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a6 -; RV64I-NEXT: beqz a5, .LBB13_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a6, .LBB13_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB13_5: -; RV64I-NEXT: or a4, t4, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a5, a7, .LBB13_7 +; RV64I-NEXT: or a5, a5, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a6, t1, .LBB13_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: srl t4, a3, t1 +; RV64I-NEXT: srl t5, a4, t5 ; RV64I-NEXT: j .LBB13_8 ; RV64I-NEXT: .LBB13_7: -; RV64I-NEXT: srl a0, t0, a5 -; RV64I-NEXT: srl t1, a4, a5 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t4, t1, t2 +; RV64I-NEXT: srl a7, a5, a6 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: srl a0, t0, a6 +; RV64I-NEXT: or t5, a7, t2 ; RV64I-NEXT: .LBB13_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a5, .LBB13_10 +; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: mv t2, a5 +; RV64I-NEXT: beqz a6, .LBB13_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t4 +; RV64I-NEXT: mv t2, t5 ; RV64I-NEXT: .LBB13_10: -; RV64I-NEXT: sub t6, t1, a5 -; RV64I-NEXT: bltu a5, a7, .LBB13_13 +; RV64I-NEXT: sub t6, a7, a6 +; RV64I-NEXT: bltu a6, t1, .LBB13_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t4, 0 -; RV64I-NEXT: bgeu t6, a7, .LBB13_14 +; RV64I-NEXT: li t5, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB13_14 ; RV64I-NEXT: .LBB13_12: -; RV64I-NEXT: sll t5, a6, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a6, s0 -; RV64I-NEXT: or s1, s0, t3 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t4 ; RV64I-NEXT: j .LBB13_15 ; RV64I-NEXT: .LBB13_13: -; RV64I-NEXT: srl t4, a3, a5 -; RV64I-NEXT: bltu t6, a7, .LBB13_12 +; RV64I-NEXT: srl t5, a4, a6 +; RV64I-NEXT: bltu t6, t1, .LBB13_12 ; RV64I-NEXT: .LBB13_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t3, t6, a7 -; RV64I-NEXT: sll s1, a6, t3 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t4, t6, t1 +; RV64I-NEXT: sll s1, a3, t4 ; RV64I-NEXT: .LBB13_15: -; RV64I-NEXT: sub s0, a5, t1 -; RV64I-NEXT: mv t3, t0 +; RV64I-NEXT: sub s0, a6, a7 +; RV64I-NEXT: mv t4, t0 ; RV64I-NEXT: beqz t6, .LBB13_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t3, s1 +; RV64I-NEXT: mv t4, s1 ; RV64I-NEXT: .LBB13_17: -; RV64I-NEXT: bltu s0, a7, .LBB13_19 +; RV64I-NEXT: bltu s0, t1, .LBB13_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, a7 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: srl t6, t0, t6 ; RV64I-NEXT: bnez s0, .LBB13_20 ; RV64I-NEXT: j .LBB13_21 ; RV64I-NEXT: .LBB13_19: -; RV64I-NEXT: srl t6, a6, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, t0, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, t0, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB13_21 ; RV64I-NEXT: .LBB13_20: -; RV64I-NEXT: mv a6, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB13_21: -; RV64I-NEXT: bltu s0, a7, .LBB13_23 +; RV64I-NEXT: bltu s0, t1, .LBB13_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: li a7, 0 -; RV64I-NEXT: bltu a5, t1, .LBB13_24 +; RV64I-NEXT: li t0, 0 +; RV64I-NEXT: bltu a6, a7, .LBB13_24 ; RV64I-NEXT: j .LBB13_25 ; RV64I-NEXT: .LBB13_23: -; RV64I-NEXT: srl a7, t0, s0 -; RV64I-NEXT: bgeu a5, t1, .LBB13_25 +; RV64I-NEXT: srl t0, t0, s0 +; RV64I-NEXT: bgeu a6, a7, .LBB13_25 ; RV64I-NEXT: .LBB13_24: -; RV64I-NEXT: or a6, t2, t5 -; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t0, t5, t4 ; RV64I-NEXT: .LBB13_25: -; RV64I-NEXT: bnez a5, .LBB13_29 +; RV64I-NEXT: bnez a6, .LBB13_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a5, t1, .LBB13_28 +; RV64I-NEXT: bltu a6, a7, .LBB13_28 ; RV64I-NEXT: .LBB13_27: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: li a0, 0 ; RV64I-NEXT: .LBB13_28: +; RV64I-NEXT: srli a6, a5, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a5, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a5, 24 +; RV64I-NEXT: and t1, a5, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a5, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a6, a3 +; RV64I-NEXT: srli t0, a5, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb t0, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) ; RV64I-NEXT: srli a5, a4, 32 ; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB13_29: -; RV64I-NEXT: mv a4, a6 -; RV64I-NEXT: mv a3, a7 -; RV64I-NEXT: bgeu a5, t1, .LBB13_27 +; RV64I-NEXT: mv a5, a3 +; RV64I-NEXT: mv a4, t0 +; RV64I-NEXT: bgeu a6, a7, .LBB13_27 ; RV64I-NEXT: j .LBB13_28 ; ; RV32I-LABEL: lshr_32bytes_wordOff: @@ -4150,749 +4096,758 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 6(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a5, a4, a7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a4, t0, t2 +; RV32I-NEXT: or a7, t1, t3 +; RV32I-NEXT: lbu t0, 9(a0) +; RV32I-NEXT: lbu t3, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t2, 3(a1) +; RV32I-NEXT: lbu t5, 8(a0) ; RV32I-NEXT: lbu t6, 10(a0) ; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: or a7, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 13(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu s1, 15(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or t3, t3, t2 -; RV32I-NEXT: or t0, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: lbu t2, 1(a1) -; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t5 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or s0, t2, t4 -; RV32I-NEXT: slli t2, s1, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t6 -; RV32I-NEXT: slli t4, a7, 16 -; RV32I-NEXT: slli a7, t3, 16 -; RV32I-NEXT: slli t3, t5, 16 -; RV32I-NEXT: slli t5, a1, 16 -; RV32I-NEXT: or a1, a7, t1 -; RV32I-NEXT: or a7, t5, s0 -; RV32I-NEXT: slli a7, a7, 5 -; RV32I-NEXT: srli t1, a7, 5 -; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, t2, t1 +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a7, 15(a0) +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or t4, s0, t6 +; RV32I-NEXT: or a4, a4, t3 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli a4, a4, 5 +; RV32I-NEXT: slli s0, a7, 8 +; RV32I-NEXT: andi t5, a4, 31 +; RV32I-NEXT: srli a7, a4, 5 ; RV32I-NEXT: neg s3, t5 ; RV32I-NEXT: beqz t5, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a4, a1, s3 +; RV32I-NEXT: sll a6, a1, s3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: or s7, t4, a3 ; RV32I-NEXT: lbu t4, 12(a0) ; RV32I-NEXT: lbu t6, 19(a0) -; RV32I-NEXT: slli s1, a6, 8 -; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, s0, t1 ; RV32I-NEXT: or a3, t3, t0 -; RV32I-NEXT: beqz t1, .LBB13_4 +; RV32I-NEXT: sw a5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a7, .LBB13_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: j .LBB13_5 ; RV32I-NEXT: .LBB13_4: -; RV32I-NEXT: srl s0, s7, a7 -; RV32I-NEXT: or s0, s0, a4 +; RV32I-NEXT: srl t0, a5, a4 +; RV32I-NEXT: or s0, t0, a6 ; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: lbu t0, 17(a0) -; RV32I-NEXT: lbu a4, 18(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s2, s1, t4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s2, 17(a0) +; RV32I-NEXT: lbu a6, 18(a0) +; RV32I-NEXT: slli s1, t6, 8 +; RV32I-NEXT: or s4, t2, t4 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: li s5, 1 ; RV32I-NEXT: sll t6, a3, s3 ; RV32I-NEXT: beqz t5, .LBB13_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: .LBB13_7: ; RV32I-NEXT: lbu t2, 16(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli s1, t0, 8 -; RV32I-NEXT: or t4, s4, a4 -; RV32I-NEXT: srl a4, a1, a7 -; RV32I-NEXT: or a5, a5, s2 -; RV32I-NEXT: bne t1, s5, .LBB13_9 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or t4, s1, a6 +; RV32I-NEXT: srl s1, a1, a4 +; RV32I-NEXT: or a6, t1, s4 +; RV32I-NEXT: bne a7, s5, .LBB13_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: or s0, s1, t0 ; RV32I-NEXT: .LBB13_9: -; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu s5, 21(a0) -; RV32I-NEXT: lbu a6, 22(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t2, s1, t2 -; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu t0, 22(a0) +; RV32I-NEXT: slli s5, t3, 8 +; RV32I-NEXT: or t2, s2, t2 +; RV32I-NEXT: slli s7, t4, 16 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: sll t3, a6, s3 ; RV32I-NEXT: beqz t5, .LBB13_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: mv t1, t3 ; RV32I-NEXT: .LBB13_11: -; RV32I-NEXT: lbu s1, 20(a0) -; RV32I-NEXT: lbu s2, 27(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s4, s4, a6 -; RV32I-NEXT: srl t4, a3, a7 -; RV32I-NEXT: or a6, s6, t2 -; RV32I-NEXT: bne t1, s8, .LBB13_13 +; RV32I-NEXT: lbu s2, 20(a0) +; RV32I-NEXT: lbu s4, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s5, s5, t0 +; RV32I-NEXT: srl t4, a3, a4 +; RV32I-NEXT: or t0, s7, t2 +; RV32I-NEXT: bne a7, s8, .LBB13_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or s0, t4, t0 +; RV32I-NEXT: or s0, t4, t1 ; RV32I-NEXT: .LBB13_13: -; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s6, 25(a0) -; RV32I-NEXT: lbu t0, 26(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s7, s5, s1 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: lbu s7, 25(a0) +; RV32I-NEXT: lbu t1, 26(a0) +; RV32I-NEXT: slli s9, s4, 8 +; RV32I-NEXT: or s8, s6, s2 +; RV32I-NEXT: slli s10, s5, 16 +; RV32I-NEXT: sll s11, t0, s3 ; RV32I-NEXT: beqz t5, .LBB13_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB13_15: -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: lbu s2, 31(a0) -; RV32I-NEXT: slli s5, s6, 8 -; RV32I-NEXT: or s4, s8, t0 -; RV32I-NEXT: srl ra, a5, a7 -; RV32I-NEXT: or t0, s9, s7 -; RV32I-NEXT: li s6, 3 -; RV32I-NEXT: bne t1, s6, .LBB13_17 +; RV32I-NEXT: lbu s2, 24(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli s6, s7, 8 +; RV32I-NEXT: or s5, s9, t1 +; RV32I-NEXT: srl ra, a6, a4 +; RV32I-NEXT: or t1, s10, s8 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB13_17 ; RV32I-NEXT: # %bb.16: ; RV32I-NEXT: or s0, ra, t2 ; RV32I-NEXT: .LBB13_17: ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s7, 29(a0) -; RV32I-NEXT: lbu s6, 30(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s2, s5, s1 -; RV32I-NEXT: slli s5, s4, 16 -; RV32I-NEXT: li s9, 4 -; RV32I-NEXT: sll s1, t0, s3 -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s8, 29(a0) +; RV32I-NEXT: lbu s7, 30(a0) +; RV32I-NEXT: slli s10, s4, 8 +; RV32I-NEXT: or s4, s6, s2 +; RV32I-NEXT: slli s6, s5, 16 +; RV32I-NEXT: li a5, 4 +; RV32I-NEXT: sll s2, t1, s3 +; RV32I-NEXT: sw s2, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t5, .LBB13_19 ; RV32I-NEXT: # %bb.18: ; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB13_19: -; RV32I-NEXT: lbu s1, 28(a0) -; RV32I-NEXT: slli s7, s7, 8 -; RV32I-NEXT: or s4, s8, s6 -; RV32I-NEXT: srl s10, a6, a7 -; RV32I-NEXT: or a0, s5, s2 -; RV32I-NEXT: bne t1, s9, .LBB13_21 +; RV32I-NEXT: lbu s2, 28(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s5, s10, s7 +; RV32I-NEXT: srl s10, t0, a4 +; RV32I-NEXT: or a0, s6, s4 +; RV32I-NEXT: bne a7, a5, .LBB13_21 ; RV32I-NEXT: # %bb.20: ; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB13_21: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: or t2, s7, s1 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or t2, s8, s2 +; RV32I-NEXT: slli s5, s5, 16 +; RV32I-NEXT: li s8, 5 ; RV32I-NEXT: sll s7, a0, s3 ; RV32I-NEXT: beqz t5, .LBB13_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s2, s7 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB13_23: -; RV32I-NEXT: srl s8, t0, a7 -; RV32I-NEXT: or t2, s4, t2 -; RV32I-NEXT: bne t1, s9, .LBB13_25 +; RV32I-NEXT: srl a5, t1, a4 +; RV32I-NEXT: or t2, s5, t2 +; RV32I-NEXT: beq a7, s8, .LBB13_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: or s0, s8, s2 +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: j .LBB13_26 ; RV32I-NEXT: .LBB13_25: +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: or s0, a5, s4 +; RV32I-NEXT: .LBB13_26: ; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: sll s5, t2, s3 -; RV32I-NEXT: beqz t5, .LBB13_27 -; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: beqz t5, .LBB13_28 +; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: mv s4, s5 -; RV32I-NEXT: .LBB13_27: -; RV32I-NEXT: srl s6, a0, a7 -; RV32I-NEXT: bne t1, s2, .LBB13_29 -; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: .LBB13_28: +; RV32I-NEXT: srl s6, a0, a4 +; RV32I-NEXT: bne a7, a5, .LBB13_30 +; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: or s0, s6, s4 -; RV32I-NEXT: .LBB13_29: +; RV32I-NEXT: .LBB13_30: ; RV32I-NEXT: li s3, 7 -; RV32I-NEXT: srl s1, t2, a7 -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: bne t1, s3, .LBB13_34 -; RV32I-NEXT: # %bb.30: -; RV32I-NEXT: bnez a7, .LBB13_35 -; RV32I-NEXT: .LBB13_31: -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: bnez t5, .LBB13_36 +; RV32I-NEXT: srl s2, t2, a4 +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: bne a7, s3, .LBB13_35 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: bnez a4, .LBB13_36 ; RV32I-NEXT: .LBB13_32: -; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: beqz t1, .LBB13_37 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB13_37 ; RV32I-NEXT: .LBB13_33: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB13_38 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz a7, .LBB13_38 ; RV32I-NEXT: .LBB13_34: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: beqz a7, .LBB13_31 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: j .LBB13_39 ; RV32I-NEXT: .LBB13_35: -; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: beqz t5, .LBB13_32 +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a4, .LBB13_32 ; RV32I-NEXT: .LBB13_36: +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB13_33 +; RV32I-NEXT: .LBB13_37: ; RV32I-NEXT: mv s0, t6 ; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: bnez t1, .LBB13_33 -; RV32I-NEXT: .LBB13_37: -; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: bnez a7, .LBB13_34 ; RV32I-NEXT: .LBB13_38: -; RV32I-NEXT: li s0, 1 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: bnez t5, .LBB13_57 -; RV32I-NEXT: # %bb.39: -; RV32I-NEXT: beq t1, s0, .LBB13_58 -; RV32I-NEXT: .LBB13_40: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s1, s0 +; RV32I-NEXT: .LBB13_39: +; RV32I-NEXT: li s1, 1 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB13_59 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: beq a7, s1, .LBB13_60 ; RV32I-NEXT: .LBB13_41: -; RV32I-NEXT: beq t1, s4, .LBB13_60 -; RV32I-NEXT: .LBB13_42: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB13_61 +; RV32I-NEXT: .LBB13_42: +; RV32I-NEXT: beq a7, s4, .LBB13_62 ; RV32I-NEXT: .LBB13_43: -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: bne t1, s4, .LBB13_45 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB13_63 ; RV32I-NEXT: .LBB13_44: -; RV32I-NEXT: or a4, s10, t6 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB13_46 ; RV32I-NEXT: .LBB13_45: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: bnez t5, .LBB13_62 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: beq t1, s4, .LBB13_63 -; RV32I-NEXT: .LBB13_47: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s10, s0 +; RV32I-NEXT: .LBB13_46: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li a5, 4 ; RV32I-NEXT: bnez t5, .LBB13_64 +; RV32I-NEXT: # %bb.47: +; RV32I-NEXT: beq a7, a5, .LBB13_65 ; RV32I-NEXT: .LBB13_48: -; RV32I-NEXT: beq t1, s9, .LBB13_65 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB13_66 ; RV32I-NEXT: .LBB13_49: -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: bne t1, s2, .LBB13_66 +; RV32I-NEXT: bne a7, s8, .LBB13_51 ; RV32I-NEXT: .LBB13_50: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_67 +; RV32I-NEXT: or t6, s6, s0 ; RV32I-NEXT: .LBB13_51: -; RV32I-NEXT: beqz a7, .LBB13_53 -; RV32I-NEXT: .LBB13_52: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: bne a7, s8, .LBB13_67 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_68 ; RV32I-NEXT: .LBB13_53: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: li t6, 2 -; RV32I-NEXT: beqz t5, .LBB13_55 -; RV32I-NEXT: # %bb.54: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a4, .LBB13_55 +; RV32I-NEXT: .LBB13_54: +; RV32I-NEXT: mv a1, t6 ; RV32I-NEXT: .LBB13_55: -; RV32I-NEXT: beqz t1, .LBB13_68 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz t5, .LBB13_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB13_69 -; RV32I-NEXT: .LBB13_57: ; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: bne t1, s0, .LBB13_40 -; RV32I-NEXT: .LBB13_58: -; RV32I-NEXT: or a4, t4, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB13_41 +; RV32I-NEXT: .LBB13_57: +; RV32I-NEXT: beqz a7, .LBB13_69 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: j .LBB13_70 ; RV32I-NEXT: .LBB13_59: -; RV32I-NEXT: mv t6, s11 -; RV32I-NEXT: bne t1, s4, .LBB13_42 +; RV32I-NEXT: mv s0, t3 +; RV32I-NEXT: bne a7, s1, .LBB13_41 ; RV32I-NEXT: .LBB13_60: -; RV32I-NEXT: or a4, ra, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB13_43 +; RV32I-NEXT: or t6, t4, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB13_42 ; RV32I-NEXT: .LBB13_61: -; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: beq t1, s4, .LBB13_44 -; RV32I-NEXT: j .LBB13_45 +; RV32I-NEXT: mv s0, s11 +; RV32I-NEXT: bne a7, s4, .LBB13_43 ; RV32I-NEXT: .LBB13_62: -; RV32I-NEXT: mv t6, s7 -; RV32I-NEXT: bne t1, s4, .LBB13_47 +; RV32I-NEXT: or t6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB13_44 ; RV32I-NEXT: .LBB13_63: -; RV32I-NEXT: or a4, s8, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB13_48 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: beq a7, a5, .LBB13_45 +; RV32I-NEXT: j .LBB13_46 ; RV32I-NEXT: .LBB13_64: -; RV32I-NEXT: mv t6, s5 -; RV32I-NEXT: bne t1, s9, .LBB13_49 +; RV32I-NEXT: mv s0, s7 +; RV32I-NEXT: bne a7, a5, .LBB13_48 ; RV32I-NEXT: .LBB13_65: -; RV32I-NEXT: or a4, s6, t6 -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: beq t1, s2, .LBB13_50 +; RV32I-NEXT: or t6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB13_49 ; RV32I-NEXT: .LBB13_66: -; RV32I-NEXT: mv t6, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_51 +; RV32I-NEXT: mv s0, s5 +; RV32I-NEXT: beq a7, s8, .LBB13_50 +; RV32I-NEXT: j .LBB13_51 ; RV32I-NEXT: .LBB13_67: -; RV32I-NEXT: mv a4, t6 -; RV32I-NEXT: bnez a7, .LBB13_52 -; RV32I-NEXT: j .LBB13_53 +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_53 ; RV32I-NEXT: .LBB13_68: -; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: mv t6, s0 +; RV32I-NEXT: bnez a4, .LBB13_54 +; RV32I-NEXT: j .LBB13_55 ; RV32I-NEXT: .LBB13_69: -; RV32I-NEXT: li t4, 3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_84 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: beq t1, s0, .LBB13_85 -; RV32I-NEXT: .LBB13_71: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_86 +; RV32I-NEXT: or t3, t4, t6 +; RV32I-NEXT: .LBB13_70: +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_85 +; RV32I-NEXT: # %bb.71: +; RV32I-NEXT: beq a7, s1, .LBB13_86 ; RV32I-NEXT: .LBB13_72: -; RV32I-NEXT: beq t1, t6, .LBB13_87 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_87 ; RV32I-NEXT: .LBB13_73: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_88 +; RV32I-NEXT: beq a7, s4, .LBB13_88 ; RV32I-NEXT: .LBB13_74: -; RV32I-NEXT: beq t1, t4, .LBB13_89 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_89 ; RV32I-NEXT: .LBB13_75: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_90 +; RV32I-NEXT: beq a7, t6, .LBB13_90 ; RV32I-NEXT: .LBB13_76: -; RV32I-NEXT: beq t1, s4, .LBB13_91 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_91 ; RV32I-NEXT: .LBB13_77: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s9, .LBB13_92 +; RV32I-NEXT: beq a7, a5, .LBB13_92 ; RV32I-NEXT: .LBB13_78: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_93 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s0, .LBB13_93 ; RV32I-NEXT: .LBB13_79: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_94 +; RV32I-NEXT: bne a7, s8, .LBB13_94 ; RV32I-NEXT: .LBB13_80: -; RV32I-NEXT: bnez a7, .LBB13_95 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_95 ; RV32I-NEXT: .LBB13_81: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB13_96 +; RV32I-NEXT: bnez a4, .LBB13_96 ; RV32I-NEXT: .LBB13_82: -; RV32I-NEXT: beqz t1, .LBB13_97 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_97 ; RV32I-NEXT: .LBB13_83: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB13_98 +; RV32I-NEXT: beqz a7, .LBB13_98 ; RV32I-NEXT: .LBB13_84: -; RV32I-NEXT: mv t3, s11 -; RV32I-NEXT: bne t1, s0, .LBB13_71 -; RV32I-NEXT: .LBB13_85: -; RV32I-NEXT: or a4, ra, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB13_72 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_99 +; RV32I-NEXT: j .LBB13_100 +; RV32I-NEXT: .LBB13_85: +; RV32I-NEXT: mv t4, s11 +; RV32I-NEXT: bne a7, s1, .LBB13_72 ; RV32I-NEXT: .LBB13_86: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, t6, .LBB13_73 +; RV32I-NEXT: or t3, ra, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_73 ; RV32I-NEXT: .LBB13_87: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB13_74 +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, s4, .LBB13_74 ; RV32I-NEXT: .LBB13_88: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t4, .LBB13_75 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_75 ; RV32I-NEXT: .LBB13_89: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB13_76 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, t6, .LBB13_76 ; RV32I-NEXT: .LBB13_90: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, s4, .LBB13_77 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_77 ; RV32I-NEXT: .LBB13_91: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s9, .LBB13_78 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, a5, .LBB13_78 ; RV32I-NEXT: .LBB13_92: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_79 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s0, .LBB13_79 ; RV32I-NEXT: .LBB13_93: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_80 +; RV32I-NEXT: beq a7, s8, .LBB13_80 ; RV32I-NEXT: .LBB13_94: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB13_81 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_81 ; RV32I-NEXT: .LBB13_95: -; RV32I-NEXT: mv a3, t3 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB13_82 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB13_82 ; RV32I-NEXT: .LBB13_96: -; RV32I-NEXT: mv a4, s11 -; RV32I-NEXT: bnez t1, .LBB13_83 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_83 ; RV32I-NEXT: .LBB13_97: -; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bnez a7, .LBB13_84 ; RV32I-NEXT: .LBB13_98: -; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_112 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: beq t1, s0, .LBB13_113 +; RV32I-NEXT: or t3, ra, t3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_100 +; RV32I-NEXT: .LBB13_99: +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB13_100: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s1, .LBB13_113 +; RV32I-NEXT: # %bb.101: +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB13_114 -; RV32I-NEXT: .LBB13_101: -; RV32I-NEXT: beq t1, t6, .LBB13_115 ; RV32I-NEXT: .LBB13_102: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB13_116 +; RV32I-NEXT: beq a7, s4, .LBB13_115 ; RV32I-NEXT: .LBB13_103: -; RV32I-NEXT: beq t1, t4, .LBB13_117 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB13_116 ; RV32I-NEXT: .LBB13_104: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s4, .LBB13_118 +; RV32I-NEXT: beq a7, t6, .LBB13_117 ; RV32I-NEXT: .LBB13_105: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB13_119 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, a5, .LBB13_118 ; RV32I-NEXT: .LBB13_106: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_120 +; RV32I-NEXT: bne a7, s0, .LBB13_119 ; RV32I-NEXT: .LBB13_107: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_121 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB13_120 ; RV32I-NEXT: .LBB13_108: -; RV32I-NEXT: bnez a7, .LBB13_122 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_121 ; RV32I-NEXT: .LBB13_109: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB13_123 +; RV32I-NEXT: bnez a4, .LBB13_122 ; RV32I-NEXT: .LBB13_110: -; RV32I-NEXT: beqz t1, .LBB13_124 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_123 ; RV32I-NEXT: .LBB13_111: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz a7, .LBB13_124 +; RV32I-NEXT: .LBB13_112: ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB13_125 ; RV32I-NEXT: j .LBB13_126 -; RV32I-NEXT: .LBB13_112: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s0, .LBB13_100 ; RV32I-NEXT: .LBB13_113: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB13_101 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_102 ; RV32I-NEXT: .LBB13_114: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t6, .LBB13_102 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, s4, .LBB13_103 ; RV32I-NEXT: .LBB13_115: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB13_103 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB13_104 ; RV32I-NEXT: .LBB13_116: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t4, .LBB13_104 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, t6, .LBB13_105 ; RV32I-NEXT: .LBB13_117: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s4, .LBB13_105 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, a5, .LBB13_106 ; RV32I-NEXT: .LBB13_118: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB13_106 -; RV32I-NEXT: .LBB13_119: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_107 +; RV32I-NEXT: beq a7, s0, .LBB13_107 +; RV32I-NEXT: .LBB13_119: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB13_108 ; RV32I-NEXT: .LBB13_120: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_108 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_109 ; RV32I-NEXT: .LBB13_121: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB13_109 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB13_110 ; RV32I-NEXT: .LBB13_122: -; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB13_110 +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_111 ; RV32I-NEXT: .LBB13_123: -; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB13_111 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a7, .LBB13_112 ; RV32I-NEXT: .LBB13_124: -; RV32I-NEXT: or a4, s10, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s10, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB13_126 ; RV32I-NEXT: .LBB13_125: -; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: mv t4, s7 ; RV32I-NEXT: .LBB13_126: -; RV32I-NEXT: beq t1, s0, .LBB13_138 +; RV32I-NEXT: beq a7, s1, .LBB13_138 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB13_139 ; RV32I-NEXT: .LBB13_128: -; RV32I-NEXT: beq t1, t6, .LBB13_140 +; RV32I-NEXT: beq a7, s4, .LBB13_140 ; RV32I-NEXT: .LBB13_129: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t4, .LBB13_141 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, t6, .LBB13_141 ; RV32I-NEXT: .LBB13_130: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB13_142 -; RV32I-NEXT: .LBB13_131: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB13_143 +; RV32I-NEXT: bne a7, a5, .LBB13_142 +; RV32I-NEXT: .LBB13_131: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB13_143 ; RV32I-NEXT: .LBB13_132: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_144 -; RV32I-NEXT: .LBB13_133: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_145 +; RV32I-NEXT: bne a7, s8, .LBB13_144 +; RV32I-NEXT: .LBB13_133: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_145 ; RV32I-NEXT: .LBB13_134: -; RV32I-NEXT: bnez a7, .LBB13_146 +; RV32I-NEXT: bnez a4, .LBB13_146 ; RV32I-NEXT: .LBB13_135: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB13_147 ; RV32I-NEXT: .LBB13_136: -; RV32I-NEXT: beqz t1, .LBB13_148 +; RV32I-NEXT: beqz a7, .LBB13_148 ; RV32I-NEXT: .LBB13_137: -; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB13_149 ; RV32I-NEXT: j .LBB13_150 ; RV32I-NEXT: .LBB13_138: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB13_128 ; RV32I-NEXT: .LBB13_139: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t6, .LBB13_129 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, s4, .LBB13_129 ; RV32I-NEXT: .LBB13_140: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t4, .LBB13_130 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, t6, .LBB13_130 ; RV32I-NEXT: .LBB13_141: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB13_131 -; RV32I-NEXT: .LBB13_142: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB13_132 +; RV32I-NEXT: beq a7, a5, .LBB13_131 +; RV32I-NEXT: .LBB13_142: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB13_132 ; RV32I-NEXT: .LBB13_143: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_133 -; RV32I-NEXT: .LBB13_144: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_134 +; RV32I-NEXT: beq a7, s8, .LBB13_133 +; RV32I-NEXT: .LBB13_144: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_134 ; RV32I-NEXT: .LBB13_145: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB13_135 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB13_135 ; RV32I-NEXT: .LBB13_146: -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB13_136 ; RV32I-NEXT: .LBB13_147: -; RV32I-NEXT: mv a4, s7 -; RV32I-NEXT: bnez t1, .LBB13_137 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bnez a7, .LBB13_137 ; RV32I-NEXT: .LBB13_148: -; RV32I-NEXT: or a4, s8, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB13_150 ; RV32I-NEXT: .LBB13_149: -; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: mv t4, s5 ; RV32I-NEXT: .LBB13_150: -; RV32I-NEXT: beq t1, s0, .LBB13_161 +; RV32I-NEXT: beq a7, s1, .LBB13_161 ; RV32I-NEXT: # %bb.151: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t6, .LBB13_162 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s4, .LBB13_162 ; RV32I-NEXT: .LBB13_152: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB13_163 -; RV32I-NEXT: .LBB13_153: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB13_164 +; RV32I-NEXT: bne a7, t6, .LBB13_163 +; RV32I-NEXT: .LBB13_153: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, a5, .LBB13_164 ; RV32I-NEXT: .LBB13_154: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB13_165 -; RV32I-NEXT: .LBB13_155: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_166 +; RV32I-NEXT: bne a7, s0, .LBB13_165 +; RV32I-NEXT: .LBB13_155: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB13_166 ; RV32I-NEXT: .LBB13_156: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_167 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_167 ; RV32I-NEXT: .LBB13_157: -; RV32I-NEXT: bnez a7, .LBB13_168 +; RV32I-NEXT: bnez a4, .LBB13_168 ; RV32I-NEXT: .LBB13_158: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB13_169 ; RV32I-NEXT: .LBB13_159: -; RV32I-NEXT: beqz t1, .LBB13_170 +; RV32I-NEXT: beqz a7, .LBB13_170 ; RV32I-NEXT: .LBB13_160: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s0, .LBB13_171 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s1, .LBB13_171 ; RV32I-NEXT: j .LBB13_172 ; RV32I-NEXT: .LBB13_161: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t6, .LBB13_152 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s4, .LBB13_152 ; RV32I-NEXT: .LBB13_162: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB13_153 -; RV32I-NEXT: .LBB13_163: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB13_154 +; RV32I-NEXT: beq a7, t6, .LBB13_153 +; RV32I-NEXT: .LBB13_163: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, a5, .LBB13_154 ; RV32I-NEXT: .LBB13_164: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB13_155 -; RV32I-NEXT: .LBB13_165: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_156 +; RV32I-NEXT: beq a7, s0, .LBB13_155 +; RV32I-NEXT: .LBB13_165: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB13_156 ; RV32I-NEXT: .LBB13_166: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_157 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_157 ; RV32I-NEXT: .LBB13_167: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB13_158 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB13_158 ; RV32I-NEXT: .LBB13_168: -; RV32I-NEXT: mv t0, a4 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB13_159 ; RV32I-NEXT: .LBB13_169: -; RV32I-NEXT: mv a4, s5 -; RV32I-NEXT: bnez t1, .LBB13_160 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bnez a7, .LBB13_160 ; RV32I-NEXT: .LBB13_170: -; RV32I-NEXT: or a4, s6, a4 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s0, .LBB13_172 +; RV32I-NEXT: or t3, s6, t3 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s1, .LBB13_172 ; RV32I-NEXT: .LBB13_171: -; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB13_172: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t6, .LBB13_190 -; RV32I-NEXT: # %bb.173: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t4, .LBB13_191 +; RV32I-NEXT: bne a7, s4, .LBB13_192 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, t6, .LBB13_193 ; RV32I-NEXT: .LBB13_174: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB13_192 -; RV32I-NEXT: .LBB13_175: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB13_193 +; RV32I-NEXT: bne a7, a5, .LBB13_194 +; RV32I-NEXT: .LBB13_175: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB13_195 ; RV32I-NEXT: .LBB13_176: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_194 -; RV32I-NEXT: .LBB13_177: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_195 +; RV32I-NEXT: bne a7, s8, .LBB13_196 +; RV32I-NEXT: .LBB13_177: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_197 ; RV32I-NEXT: .LBB13_178: -; RV32I-NEXT: bnez a7, .LBB13_196 +; RV32I-NEXT: bnez a4, .LBB13_198 ; RV32I-NEXT: .LBB13_179: -; RV32I-NEXT: bnez t1, .LBB13_197 +; RV32I-NEXT: bnez a7, .LBB13_199 ; RV32I-NEXT: .LBB13_180: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s0, .LBB13_198 -; RV32I-NEXT: .LBB13_181: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t6, .LBB13_199 +; RV32I-NEXT: bne a7, s1, .LBB13_200 +; RV32I-NEXT: .LBB13_181: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s4, .LBB13_183 ; RV32I-NEXT: .LBB13_182: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB13_200 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB13_183: +; RV32I-NEXT: li t5, 4 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB13_201 -; RV32I-NEXT: .LBB13_184: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB13_202 +; RV32I-NEXT: beq a7, t6, .LBB13_185 +; RV32I-NEXT: # %bb.184: +; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB13_185: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, t5, .LBB13_201 +; RV32I-NEXT: # %bb.186: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB13_203 -; RV32I-NEXT: .LBB13_186: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB13_204 +; RV32I-NEXT: bne a7, s0, .LBB13_202 ; RV32I-NEXT: .LBB13_187: -; RV32I-NEXT: beqz a7, .LBB13_189 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB13_203 ; RV32I-NEXT: .LBB13_188: -; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB13_204 ; RV32I-NEXT: .LBB13_189: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t3, ra, 24 -; RV32I-NEXT: srli a7, a1, 16 -; RV32I-NEXT: srli t6, a1, 24 -; RV32I-NEXT: srli t1, a3, 16 -; RV32I-NEXT: srli s2, a3, 24 -; RV32I-NEXT: srli t5, a5, 16 -; RV32I-NEXT: srli s3, a5, 24 -; RV32I-NEXT: srli s1, a6, 16 -; RV32I-NEXT: srli s6, a6, 24 -; RV32I-NEXT: srli s0, t0, 16 -; RV32I-NEXT: srli s5, t0, 24 -; RV32I-NEXT: srli s4, a0, 16 -; RV32I-NEXT: srli s7, a0, 24 -; RV32I-NEXT: srli s8, t2, 16 -; RV32I-NEXT: srli s9, t2, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s10, ra, t4 -; RV32I-NEXT: and s11, a1, t4 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t3, 3(a2) -; RV32I-NEXT: and a4, a3, t4 -; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: beqz a4, .LBB13_191 +; RV32I-NEXT: .LBB13_190: +; RV32I-NEXT: mv t2, t3 +; RV32I-NEXT: .LBB13_191: +; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: and t3, a5, a4 +; RV32I-NEXT: srli t4, a5, 24 +; RV32I-NEXT: srli t3, t3, 8 +; RV32I-NEXT: sb a5, 0(a2) +; RV32I-NEXT: sb t3, 1(a2) +; RV32I-NEXT: sb a7, 2(a2) +; RV32I-NEXT: sb t4, 3(a2) +; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: and a7, a1, a4 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: srli t3, a1, 24 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb t3, 5(a2) -; RV32I-NEXT: sb a7, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and a1, a5, t4 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a7, 5(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb t3, 7(a2) +; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: and a5, a3, a4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a7, a3, 24 ; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a3, a6, t4 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a5, 12(a2) -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a7, 11(a2) +; RV32I-NEXT: srli a1, a6, 16 +; RV32I-NEXT: and a3, a6, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: and a3, t0, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb t0, 16(a2) ; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb s6, 19(a2) -; RV32I-NEXT: and a3, a0, t4 -; RV32I-NEXT: and a4, t2, t4 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a1, t1, 16 +; RV32I-NEXT: and a3, t1, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a1, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: srli a5, t1, 24 +; RV32I-NEXT: sb t1, 20(a2) +; RV32I-NEXT: sb a3, 21(a2) +; RV32I-NEXT: sb a1, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, t2, 16 +; RV32I-NEXT: and a1, t2, a4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, t2, 24 ; RV32I-NEXT: sb t2, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload @@ -4908,64 +4863,57 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB13_190: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t4, .LBB13_174 -; RV32I-NEXT: .LBB13_191: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB13_175 ; RV32I-NEXT: .LBB13_192: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB13_176 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, t6, .LBB13_174 ; RV32I-NEXT: .LBB13_193: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_177 -; RV32I-NEXT: .LBB13_194: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_178 +; RV32I-NEXT: beq a7, a5, .LBB13_175 +; RV32I-NEXT: .LBB13_194: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB13_176 ; RV32I-NEXT: .LBB13_195: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB13_179 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s8, .LBB13_177 ; RV32I-NEXT: .LBB13_196: -; RV32I-NEXT: mv a0, t3 -; RV32I-NEXT: beqz t1, .LBB13_180 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_178 ; RV32I-NEXT: .LBB13_197: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s0, .LBB13_181 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB13_179 ; RV32I-NEXT: .LBB13_198: -; RV32I-NEXT: mv a4, s1 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t6, .LBB13_182 +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: beqz a7, .LBB13_180 ; RV32I-NEXT: .LBB13_199: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB13_183 -; RV32I-NEXT: .LBB13_200: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB13_184 +; RV32I-NEXT: beq a7, s1, .LBB13_181 +; RV32I-NEXT: .LBB13_200: +; RV32I-NEXT: mv t3, s2 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s4, .LBB13_182 +; RV32I-NEXT: j .LBB13_183 ; RV32I-NEXT: .LBB13_201: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB13_185 -; RV32I-NEXT: .LBB13_202: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB13_186 +; RV32I-NEXT: beq a7, s0, .LBB13_187 +; RV32I-NEXT: .LBB13_202: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB13_188 ; RV32I-NEXT: .LBB13_203: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB13_187 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB13_189 ; RV32I-NEXT: .LBB13_204: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: bnez a7, .LBB13_188 -; RV32I-NEXT: j .LBB13_189 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: bnez a4, .LBB13_190 +; RV32I-NEXT: j .LBB13_191 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 %bitOff = shl i256 %wordOff, 5 @@ -4977,311 +4925,297 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes_dwordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a6, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a5, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a5, a3, a5 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a7, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or t0, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a6, 16 +; RV64I-NEXT: slli a0, t0, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li a7, 64 -; RV64I-NEXT: slli t4, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 -; RV64I-NEXT: slli a5, a5, 6 -; RV64I-NEXT: sub t1, a5, a7 -; RV64I-NEXT: neg t5, a5 -; RV64I-NEXT: sll t3, t0, t5 -; RV64I-NEXT: bltu a5, a7, .LBB14_2 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: or t0, t5, t4 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: slli t1, t1, 32 +; RV64I-NEXT: or a6, a6, t0 +; RV64I-NEXT: or t0, t1, t3 +; RV64I-NEXT: slli a6, a6, 6 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a6 +; RV64I-NEXT: sub t5, a6, t1 +; RV64I-NEXT: sll t4, t0, t3 +; RV64I-NEXT: bltu a6, t1, .LBB14_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t6, t0, t1 +; RV64I-NEXT: srl t6, t0, t5 ; RV64I-NEXT: j .LBB14_3 ; RV64I-NEXT: .LBB14_2: -; RV64I-NEXT: srl t6, a6, a5 -; RV64I-NEXT: or t6, t6, t3 +; RV64I-NEXT: srl t6, a3, a6 +; RV64I-NEXT: or t6, t6, t4 ; RV64I-NEXT: .LBB14_3: -; RV64I-NEXT: or a3, t4, a3 -; RV64I-NEXT: slli t4, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a6 -; RV64I-NEXT: beqz a5, .LBB14_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a6, .LBB14_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB14_5: -; RV64I-NEXT: or a4, t4, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a5, a7, .LBB14_7 +; RV64I-NEXT: or a5, a5, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a6, t1, .LBB14_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: srl t4, a3, t1 +; RV64I-NEXT: srl t5, a4, t5 ; RV64I-NEXT: j .LBB14_8 ; RV64I-NEXT: .LBB14_7: -; RV64I-NEXT: srl a0, t0, a5 -; RV64I-NEXT: srl t1, a4, a5 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t4, t1, t2 +; RV64I-NEXT: srl a7, a5, a6 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: srl a0, t0, a6 +; RV64I-NEXT: or t5, a7, t2 ; RV64I-NEXT: .LBB14_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a5, .LBB14_10 +; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: mv t2, a5 +; RV64I-NEXT: beqz a6, .LBB14_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t4 +; RV64I-NEXT: mv t2, t5 ; RV64I-NEXT: .LBB14_10: -; RV64I-NEXT: sub t6, t1, a5 -; RV64I-NEXT: bltu a5, a7, .LBB14_13 +; RV64I-NEXT: sub t6, a7, a6 +; RV64I-NEXT: bltu a6, t1, .LBB14_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t4, 0 -; RV64I-NEXT: bgeu t6, a7, .LBB14_14 +; RV64I-NEXT: li t5, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB14_14 ; RV64I-NEXT: .LBB14_12: -; RV64I-NEXT: sll t5, a6, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a6, s0 -; RV64I-NEXT: or s1, s0, t3 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t4 ; RV64I-NEXT: j .LBB14_15 ; RV64I-NEXT: .LBB14_13: -; RV64I-NEXT: srl t4, a3, a5 -; RV64I-NEXT: bltu t6, a7, .LBB14_12 +; RV64I-NEXT: srl t5, a4, a6 +; RV64I-NEXT: bltu t6, t1, .LBB14_12 ; RV64I-NEXT: .LBB14_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t3, t6, a7 -; RV64I-NEXT: sll s1, a6, t3 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t4, t6, t1 +; RV64I-NEXT: sll s1, a3, t4 ; RV64I-NEXT: .LBB14_15: -; RV64I-NEXT: sub s0, a5, t1 -; RV64I-NEXT: mv t3, t0 +; RV64I-NEXT: sub s0, a6, a7 +; RV64I-NEXT: mv t4, t0 ; RV64I-NEXT: beqz t6, .LBB14_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t3, s1 +; RV64I-NEXT: mv t4, s1 ; RV64I-NEXT: .LBB14_17: -; RV64I-NEXT: bltu s0, a7, .LBB14_19 +; RV64I-NEXT: bltu s0, t1, .LBB14_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, a7 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: srl t6, t0, t6 ; RV64I-NEXT: bnez s0, .LBB14_20 ; RV64I-NEXT: j .LBB14_21 ; RV64I-NEXT: .LBB14_19: -; RV64I-NEXT: srl t6, a6, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, t0, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, t0, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB14_21 ; RV64I-NEXT: .LBB14_20: -; RV64I-NEXT: mv a6, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB14_21: -; RV64I-NEXT: bltu s0, a7, .LBB14_23 +; RV64I-NEXT: bltu s0, t1, .LBB14_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: li a7, 0 -; RV64I-NEXT: bltu a5, t1, .LBB14_24 +; RV64I-NEXT: li t0, 0 +; RV64I-NEXT: bltu a6, a7, .LBB14_24 ; RV64I-NEXT: j .LBB14_25 ; RV64I-NEXT: .LBB14_23: -; RV64I-NEXT: srl a7, t0, s0 -; RV64I-NEXT: bgeu a5, t1, .LBB14_25 +; RV64I-NEXT: srl t0, t0, s0 +; RV64I-NEXT: bgeu a6, a7, .LBB14_25 ; RV64I-NEXT: .LBB14_24: -; RV64I-NEXT: or a6, t2, t5 -; RV64I-NEXT: or a7, t4, t3 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t0, t5, t4 ; RV64I-NEXT: .LBB14_25: -; RV64I-NEXT: bnez a5, .LBB14_29 +; RV64I-NEXT: bnez a6, .LBB14_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a5, t1, .LBB14_28 +; RV64I-NEXT: bltu a6, a7, .LBB14_28 ; RV64I-NEXT: .LBB14_27: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: li a0, 0 ; RV64I-NEXT: .LBB14_28: +; RV64I-NEXT: srli a6, a5, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a5, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a5, 24 +; RV64I-NEXT: and t1, a5, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a5, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a6, a3 +; RV64I-NEXT: srli t0, a5, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: sb a7, 5(a2) +; RV64I-NEXT: sb t0, 6(a2) +; RV64I-NEXT: sb a5, 7(a2) ; RV64I-NEXT: srli a5, a4, 32 ; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB14_29: -; RV64I-NEXT: mv a4, a6 -; RV64I-NEXT: mv a3, a7 -; RV64I-NEXT: bgeu a5, t1, .LBB14_27 +; RV64I-NEXT: mv a5, a3 +; RV64I-NEXT: mv a4, t0 +; RV64I-NEXT: bgeu a6, a7, .LBB14_27 ; RV64I-NEXT: j .LBB14_28 ; ; RV32I-LABEL: lshr_32bytes_dwordOff: @@ -5300,749 +5234,758 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 6(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: or a5, a4, a7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a4, t0, t2 +; RV32I-NEXT: or a7, t1, t3 +; RV32I-NEXT: lbu t0, 9(a0) +; RV32I-NEXT: lbu t3, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t2, 3(a1) +; RV32I-NEXT: lbu t5, 8(a0) ; RV32I-NEXT: lbu t6, 10(a0) ; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: or a7, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 13(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu s1, 15(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or t3, t3, t2 -; RV32I-NEXT: or t0, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: lbu t2, 1(a1) -; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t5 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or s0, t2, t4 -; RV32I-NEXT: slli t2, s1, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t6 -; RV32I-NEXT: slli t4, a7, 16 -; RV32I-NEXT: slli a7, t3, 16 -; RV32I-NEXT: slli t3, t5, 16 -; RV32I-NEXT: slli t5, a1, 16 -; RV32I-NEXT: or a1, a7, t1 -; RV32I-NEXT: or a7, t5, s0 -; RV32I-NEXT: slli a7, a7, 6 -; RV32I-NEXT: srli t1, a7, 5 -; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or a4, t2, t1 +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a7, 15(a0) +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t3, t4, t3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or t4, s0, t6 +; RV32I-NEXT: or a4, a4, t3 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli a4, a4, 6 +; RV32I-NEXT: slli s0, a7, 8 +; RV32I-NEXT: andi t5, a4, 31 +; RV32I-NEXT: srli a7, a4, 5 ; RV32I-NEXT: neg s3, t5 ; RV32I-NEXT: beqz t5, .LBB14_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a4, a1, s3 +; RV32I-NEXT: sll a6, a1, s3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: or s7, t4, a3 ; RV32I-NEXT: lbu t4, 12(a0) ; RV32I-NEXT: lbu t6, 19(a0) -; RV32I-NEXT: slli s1, a6, 8 -; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, s0, t1 ; RV32I-NEXT: or a3, t3, t0 -; RV32I-NEXT: beqz t1, .LBB14_4 +; RV32I-NEXT: sw a5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a7, .LBB14_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: j .LBB14_5 ; RV32I-NEXT: .LBB14_4: -; RV32I-NEXT: srl s0, s7, a7 -; RV32I-NEXT: or s0, s0, a4 +; RV32I-NEXT: srl t0, a5, a4 +; RV32I-NEXT: or s0, t0, a6 ; RV32I-NEXT: .LBB14_5: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: lbu t0, 17(a0) -; RV32I-NEXT: lbu a4, 18(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s2, s1, t4 -; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s2, 17(a0) +; RV32I-NEXT: lbu a6, 18(a0) +; RV32I-NEXT: slli s1, t6, 8 +; RV32I-NEXT: or s4, t2, t4 +; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: li s5, 1 ; RV32I-NEXT: sll t6, a3, s3 ; RV32I-NEXT: beqz t5, .LBB14_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: .LBB14_7: ; RV32I-NEXT: lbu t2, 16(a0) ; RV32I-NEXT: lbu t3, 23(a0) -; RV32I-NEXT: slli s1, t0, 8 -; RV32I-NEXT: or t4, s4, a4 -; RV32I-NEXT: srl a4, a1, a7 -; RV32I-NEXT: or a5, a5, s2 -; RV32I-NEXT: bne t1, s5, .LBB14_9 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or t4, s1, a6 +; RV32I-NEXT: srl s1, a1, a4 +; RV32I-NEXT: or a6, t1, s4 +; RV32I-NEXT: bne a7, s5, .LBB14_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: or s0, s1, t0 ; RV32I-NEXT: .LBB14_9: -; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu s5, 21(a0) -; RV32I-NEXT: lbu a6, 22(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t2, s1, t2 -; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu t0, 22(a0) +; RV32I-NEXT: slli s5, t3, 8 +; RV32I-NEXT: or t2, s2, t2 +; RV32I-NEXT: slli s7, t4, 16 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: sll t3, a6, s3 ; RV32I-NEXT: beqz t5, .LBB14_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: mv t1, t3 ; RV32I-NEXT: .LBB14_11: -; RV32I-NEXT: lbu s1, 20(a0) -; RV32I-NEXT: lbu s2, 27(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s4, s4, a6 -; RV32I-NEXT: srl t4, a3, a7 -; RV32I-NEXT: or a6, s6, t2 -; RV32I-NEXT: bne t1, s8, .LBB14_13 +; RV32I-NEXT: lbu s2, 20(a0) +; RV32I-NEXT: lbu s4, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s5, s5, t0 +; RV32I-NEXT: srl t4, a3, a4 +; RV32I-NEXT: or t0, s7, t2 +; RV32I-NEXT: bne a7, s8, .LBB14_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or s0, t4, t0 +; RV32I-NEXT: or s0, t4, t1 ; RV32I-NEXT: .LBB14_13: -; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s6, 25(a0) -; RV32I-NEXT: lbu t0, 26(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s7, s5, s1 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: lbu s7, 25(a0) +; RV32I-NEXT: lbu t1, 26(a0) +; RV32I-NEXT: slli s9, s4, 8 +; RV32I-NEXT: or s8, s6, s2 +; RV32I-NEXT: slli s10, s5, 16 +; RV32I-NEXT: sll s11, t0, s3 ; RV32I-NEXT: beqz t5, .LBB14_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB14_15: -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: lbu s2, 31(a0) -; RV32I-NEXT: slli s5, s6, 8 -; RV32I-NEXT: or s4, s8, t0 -; RV32I-NEXT: srl ra, a5, a7 -; RV32I-NEXT: or t0, s9, s7 -; RV32I-NEXT: li s6, 3 -; RV32I-NEXT: bne t1, s6, .LBB14_17 +; RV32I-NEXT: lbu s2, 24(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli s6, s7, 8 +; RV32I-NEXT: or s5, s9, t1 +; RV32I-NEXT: srl ra, a6, a4 +; RV32I-NEXT: or t1, s10, s8 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB14_17 ; RV32I-NEXT: # %bb.16: ; RV32I-NEXT: or s0, ra, t2 ; RV32I-NEXT: .LBB14_17: ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: lbu s7, 29(a0) -; RV32I-NEXT: lbu s6, 30(a0) -; RV32I-NEXT: slli s8, s2, 8 -; RV32I-NEXT: or s2, s5, s1 -; RV32I-NEXT: slli s5, s4, 16 -; RV32I-NEXT: li s9, 4 -; RV32I-NEXT: sll s1, t0, s3 -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s8, 29(a0) +; RV32I-NEXT: lbu s7, 30(a0) +; RV32I-NEXT: slli s10, s4, 8 +; RV32I-NEXT: or s4, s6, s2 +; RV32I-NEXT: slli s6, s5, 16 +; RV32I-NEXT: li a5, 4 +; RV32I-NEXT: sll s2, t1, s3 +; RV32I-NEXT: sw s2, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t5, .LBB14_19 ; RV32I-NEXT: # %bb.18: ; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB14_19: -; RV32I-NEXT: lbu s1, 28(a0) -; RV32I-NEXT: slli s7, s7, 8 -; RV32I-NEXT: or s4, s8, s6 -; RV32I-NEXT: srl s10, a6, a7 -; RV32I-NEXT: or a0, s5, s2 -; RV32I-NEXT: bne t1, s9, .LBB14_21 +; RV32I-NEXT: lbu s2, 28(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s5, s10, s7 +; RV32I-NEXT: srl s10, t0, a4 +; RV32I-NEXT: or a0, s6, s4 +; RV32I-NEXT: bne a7, a5, .LBB14_21 ; RV32I-NEXT: # %bb.20: ; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB14_21: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: or t2, s7, s1 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or t2, s8, s2 +; RV32I-NEXT: slli s5, s5, 16 +; RV32I-NEXT: li s8, 5 ; RV32I-NEXT: sll s7, a0, s3 ; RV32I-NEXT: beqz t5, .LBB14_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s2, s7 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB14_23: -; RV32I-NEXT: srl s8, t0, a7 -; RV32I-NEXT: or t2, s4, t2 -; RV32I-NEXT: bne t1, s9, .LBB14_25 +; RV32I-NEXT: srl a5, t1, a4 +; RV32I-NEXT: or t2, s5, t2 +; RV32I-NEXT: beq a7, s8, .LBB14_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: or s0, s8, s2 +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: j .LBB14_26 ; RV32I-NEXT: .LBB14_25: +; RV32I-NEXT: mv s9, a5 +; RV32I-NEXT: or s0, a5, s4 +; RV32I-NEXT: .LBB14_26: ; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: sll s5, t2, s3 -; RV32I-NEXT: beqz t5, .LBB14_27 -; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: beqz t5, .LBB14_28 +; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: mv s4, s5 -; RV32I-NEXT: .LBB14_27: -; RV32I-NEXT: srl s6, a0, a7 -; RV32I-NEXT: bne t1, s2, .LBB14_29 -; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: .LBB14_28: +; RV32I-NEXT: srl s6, a0, a4 +; RV32I-NEXT: bne a7, a5, .LBB14_30 +; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: or s0, s6, s4 -; RV32I-NEXT: .LBB14_29: +; RV32I-NEXT: .LBB14_30: ; RV32I-NEXT: li s3, 7 -; RV32I-NEXT: srl s1, t2, a7 -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: bne t1, s3, .LBB14_34 -; RV32I-NEXT: # %bb.30: -; RV32I-NEXT: bnez a7, .LBB14_35 -; RV32I-NEXT: .LBB14_31: -; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: bnez t5, .LBB14_36 +; RV32I-NEXT: srl s2, t2, a4 +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: bne a7, s3, .LBB14_35 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: bnez a4, .LBB14_36 ; RV32I-NEXT: .LBB14_32: -; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: beqz t1, .LBB14_37 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB14_37 ; RV32I-NEXT: .LBB14_33: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB14_38 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz a7, .LBB14_38 ; RV32I-NEXT: .LBB14_34: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: beqz a7, .LBB14_31 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: j .LBB14_39 ; RV32I-NEXT: .LBB14_35: +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a4, .LBB14_32 +; RV32I-NEXT: .LBB14_36: ; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li s0, 0 -; RV32I-NEXT: beqz t5, .LBB14_32 -; RV32I-NEXT: .LBB14_36: +; RV32I-NEXT: beqz t5, .LBB14_33 +; RV32I-NEXT: .LBB14_37: ; RV32I-NEXT: mv s0, t6 ; RV32I-NEXT: li s4, 2 -; RV32I-NEXT: bnez t1, .LBB14_33 -; RV32I-NEXT: .LBB14_37: -; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: bnez a7, .LBB14_34 ; RV32I-NEXT: .LBB14_38: -; RV32I-NEXT: li s0, 1 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: bnez t5, .LBB14_57 -; RV32I-NEXT: # %bb.39: -; RV32I-NEXT: beq t1, s0, .LBB14_58 -; RV32I-NEXT: .LBB14_40: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s1, s0 +; RV32I-NEXT: .LBB14_39: +; RV32I-NEXT: li s1, 1 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB14_59 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: beq a7, s1, .LBB14_60 ; RV32I-NEXT: .LBB14_41: -; RV32I-NEXT: beq t1, s4, .LBB14_60 -; RV32I-NEXT: .LBB14_42: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: bnez t5, .LBB14_61 +; RV32I-NEXT: .LBB14_42: +; RV32I-NEXT: beq a7, s4, .LBB14_62 ; RV32I-NEXT: .LBB14_43: -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: bne t1, s4, .LBB14_45 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB14_63 ; RV32I-NEXT: .LBB14_44: -; RV32I-NEXT: or a4, s10, t6 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bne a7, a5, .LBB14_46 ; RV32I-NEXT: .LBB14_45: -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: bnez t5, .LBB14_62 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: beq t1, s4, .LBB14_63 -; RV32I-NEXT: .LBB14_47: -; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: or t6, s10, s0 +; RV32I-NEXT: .LBB14_46: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li a5, 4 ; RV32I-NEXT: bnez t5, .LBB14_64 +; RV32I-NEXT: # %bb.47: +; RV32I-NEXT: beq a7, a5, .LBB14_65 ; RV32I-NEXT: .LBB14_48: -; RV32I-NEXT: beq t1, s9, .LBB14_65 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB14_66 ; RV32I-NEXT: .LBB14_49: -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: bne t1, s2, .LBB14_66 +; RV32I-NEXT: bne a7, s8, .LBB14_51 ; RV32I-NEXT: .LBB14_50: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_67 +; RV32I-NEXT: or t6, s6, s0 ; RV32I-NEXT: .LBB14_51: -; RV32I-NEXT: beqz a7, .LBB14_53 -; RV32I-NEXT: .LBB14_52: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: bne a7, s8, .LBB14_67 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_68 ; RV32I-NEXT: .LBB14_53: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: li t6, 2 -; RV32I-NEXT: beqz t5, .LBB14_55 -; RV32I-NEXT: # %bb.54: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a4, .LBB14_55 +; RV32I-NEXT: .LBB14_54: +; RV32I-NEXT: mv a1, t6 ; RV32I-NEXT: .LBB14_55: -; RV32I-NEXT: beqz t1, .LBB14_68 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz t5, .LBB14_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB14_69 -; RV32I-NEXT: .LBB14_57: ; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: bne t1, s0, .LBB14_40 -; RV32I-NEXT: .LBB14_58: -; RV32I-NEXT: or a4, t4, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB14_41 +; RV32I-NEXT: .LBB14_57: +; RV32I-NEXT: beqz a7, .LBB14_69 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: j .LBB14_70 ; RV32I-NEXT: .LBB14_59: -; RV32I-NEXT: mv t6, s11 -; RV32I-NEXT: bne t1, s4, .LBB14_42 +; RV32I-NEXT: mv s0, t3 +; RV32I-NEXT: bne a7, s1, .LBB14_41 ; RV32I-NEXT: .LBB14_60: -; RV32I-NEXT: or a4, ra, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB14_43 +; RV32I-NEXT: or t6, t4, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB14_42 ; RV32I-NEXT: .LBB14_61: -; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: li s4, 3 -; RV32I-NEXT: beq t1, s4, .LBB14_44 -; RV32I-NEXT: j .LBB14_45 +; RV32I-NEXT: mv s0, s11 +; RV32I-NEXT: bne a7, s4, .LBB14_43 ; RV32I-NEXT: .LBB14_62: -; RV32I-NEXT: mv t6, s7 -; RV32I-NEXT: bne t1, s4, .LBB14_47 +; RV32I-NEXT: or t6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB14_44 ; RV32I-NEXT: .LBB14_63: -; RV32I-NEXT: or a4, s8, t6 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: beqz t5, .LBB14_48 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: beq a7, a5, .LBB14_45 +; RV32I-NEXT: j .LBB14_46 ; RV32I-NEXT: .LBB14_64: -; RV32I-NEXT: mv t6, s5 -; RV32I-NEXT: bne t1, s9, .LBB14_49 +; RV32I-NEXT: mv s0, s7 +; RV32I-NEXT: bne a7, a5, .LBB14_48 ; RV32I-NEXT: .LBB14_65: -; RV32I-NEXT: or a4, s6, t6 -; RV32I-NEXT: mv t6, s1 -; RV32I-NEXT: beq t1, s2, .LBB14_50 +; RV32I-NEXT: or t6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB14_49 ; RV32I-NEXT: .LBB14_66: -; RV32I-NEXT: mv t6, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_51 +; RV32I-NEXT: mv s0, s5 +; RV32I-NEXT: beq a7, s8, .LBB14_50 +; RV32I-NEXT: j .LBB14_51 ; RV32I-NEXT: .LBB14_67: -; RV32I-NEXT: mv a4, t6 -; RV32I-NEXT: bnez a7, .LBB14_52 -; RV32I-NEXT: j .LBB14_53 +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_53 ; RV32I-NEXT: .LBB14_68: -; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: mv t6, s0 +; RV32I-NEXT: bnez a4, .LBB14_54 +; RV32I-NEXT: j .LBB14_55 ; RV32I-NEXT: .LBB14_69: -; RV32I-NEXT: li t4, 3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_84 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: beq t1, s0, .LBB14_85 -; RV32I-NEXT: .LBB14_71: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_86 +; RV32I-NEXT: or t3, t4, t6 +; RV32I-NEXT: .LBB14_70: +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_85 +; RV32I-NEXT: # %bb.71: +; RV32I-NEXT: beq a7, s1, .LBB14_86 ; RV32I-NEXT: .LBB14_72: -; RV32I-NEXT: beq t1, t6, .LBB14_87 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_87 ; RV32I-NEXT: .LBB14_73: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_88 +; RV32I-NEXT: beq a7, s4, .LBB14_88 ; RV32I-NEXT: .LBB14_74: -; RV32I-NEXT: beq t1, t4, .LBB14_89 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_89 ; RV32I-NEXT: .LBB14_75: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_90 +; RV32I-NEXT: beq a7, t6, .LBB14_90 ; RV32I-NEXT: .LBB14_76: -; RV32I-NEXT: beq t1, s4, .LBB14_91 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_91 ; RV32I-NEXT: .LBB14_77: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s9, .LBB14_92 +; RV32I-NEXT: beq a7, a5, .LBB14_92 ; RV32I-NEXT: .LBB14_78: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_93 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s0, .LBB14_93 ; RV32I-NEXT: .LBB14_79: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_94 +; RV32I-NEXT: bne a7, s8, .LBB14_94 ; RV32I-NEXT: .LBB14_80: -; RV32I-NEXT: bnez a7, .LBB14_95 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_95 ; RV32I-NEXT: .LBB14_81: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB14_96 +; RV32I-NEXT: bnez a4, .LBB14_96 ; RV32I-NEXT: .LBB14_82: -; RV32I-NEXT: beqz t1, .LBB14_97 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_97 ; RV32I-NEXT: .LBB14_83: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: j .LBB14_98 +; RV32I-NEXT: beqz a7, .LBB14_98 ; RV32I-NEXT: .LBB14_84: -; RV32I-NEXT: mv t3, s11 -; RV32I-NEXT: bne t1, s0, .LBB14_71 -; RV32I-NEXT: .LBB14_85: -; RV32I-NEXT: or a4, ra, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB14_72 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_99 +; RV32I-NEXT: j .LBB14_100 +; RV32I-NEXT: .LBB14_85: +; RV32I-NEXT: mv t4, s11 +; RV32I-NEXT: bne a7, s1, .LBB14_72 ; RV32I-NEXT: .LBB14_86: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, t6, .LBB14_73 +; RV32I-NEXT: or t3, ra, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_73 ; RV32I-NEXT: .LBB14_87: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB14_74 +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, s4, .LBB14_74 ; RV32I-NEXT: .LBB14_88: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t4, .LBB14_75 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_75 ; RV32I-NEXT: .LBB14_89: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB14_76 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, t6, .LBB14_76 ; RV32I-NEXT: .LBB14_90: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, s4, .LBB14_77 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_77 ; RV32I-NEXT: .LBB14_91: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s9, .LBB14_78 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, a5, .LBB14_78 ; RV32I-NEXT: .LBB14_92: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_79 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s0, .LBB14_79 ; RV32I-NEXT: .LBB14_93: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_80 +; RV32I-NEXT: beq a7, s8, .LBB14_80 ; RV32I-NEXT: .LBB14_94: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB14_81 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_81 ; RV32I-NEXT: .LBB14_95: -; RV32I-NEXT: mv a3, t3 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB14_82 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB14_82 ; RV32I-NEXT: .LBB14_96: -; RV32I-NEXT: mv a4, s11 -; RV32I-NEXT: bnez t1, .LBB14_83 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_83 ; RV32I-NEXT: .LBB14_97: -; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bnez a7, .LBB14_84 ; RV32I-NEXT: .LBB14_98: -; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_112 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: beq t1, s0, .LBB14_113 +; RV32I-NEXT: or t3, ra, t3 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_100 +; RV32I-NEXT: .LBB14_99: +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB14_100: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s1, .LBB14_113 +; RV32I-NEXT: # %bb.101: +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB14_114 -; RV32I-NEXT: .LBB14_101: -; RV32I-NEXT: beq t1, t6, .LBB14_115 ; RV32I-NEXT: .LBB14_102: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bnez t5, .LBB14_116 +; RV32I-NEXT: beq a7, s4, .LBB14_115 ; RV32I-NEXT: .LBB14_103: -; RV32I-NEXT: beq t1, t4, .LBB14_117 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bnez t5, .LBB14_116 ; RV32I-NEXT: .LBB14_104: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s4, .LBB14_118 +; RV32I-NEXT: beq a7, t6, .LBB14_117 ; RV32I-NEXT: .LBB14_105: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB14_119 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, a5, .LBB14_118 ; RV32I-NEXT: .LBB14_106: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_120 +; RV32I-NEXT: bne a7, s0, .LBB14_119 ; RV32I-NEXT: .LBB14_107: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_121 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB14_120 ; RV32I-NEXT: .LBB14_108: -; RV32I-NEXT: bnez a7, .LBB14_122 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_121 ; RV32I-NEXT: .LBB14_109: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bnez t5, .LBB14_123 +; RV32I-NEXT: bnez a4, .LBB14_122 ; RV32I-NEXT: .LBB14_110: -; RV32I-NEXT: beqz t1, .LBB14_124 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_123 ; RV32I-NEXT: .LBB14_111: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz a7, .LBB14_124 +; RV32I-NEXT: .LBB14_112: ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB14_125 ; RV32I-NEXT: j .LBB14_126 -; RV32I-NEXT: .LBB14_112: -; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s0, .LBB14_100 ; RV32I-NEXT: .LBB14_113: -; RV32I-NEXT: or a4, s10, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB14_101 +; RV32I-NEXT: or t3, s10, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_102 ; RV32I-NEXT: .LBB14_114: -; RV32I-NEXT: mv t3, s7 -; RV32I-NEXT: bne t1, t6, .LBB14_102 +; RV32I-NEXT: mv t4, s7 +; RV32I-NEXT: bne a7, s4, .LBB14_103 ; RV32I-NEXT: .LBB14_115: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beqz t5, .LBB14_103 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beqz t5, .LBB14_104 ; RV32I-NEXT: .LBB14_116: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t4, .LBB14_104 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, t6, .LBB14_105 ; RV32I-NEXT: .LBB14_117: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s4, .LBB14_105 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, a5, .LBB14_106 ; RV32I-NEXT: .LBB14_118: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB14_106 -; RV32I-NEXT: .LBB14_119: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_107 +; RV32I-NEXT: beq a7, s0, .LBB14_107 +; RV32I-NEXT: .LBB14_119: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB14_108 ; RV32I-NEXT: .LBB14_120: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_108 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_109 ; RV32I-NEXT: .LBB14_121: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB14_109 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB14_110 ; RV32I-NEXT: .LBB14_122: -; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beqz t5, .LBB14_110 +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_111 ; RV32I-NEXT: .LBB14_123: -; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB14_111 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez a7, .LBB14_112 ; RV32I-NEXT: .LBB14_124: -; RV32I-NEXT: or a4, s10, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s10, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB14_126 ; RV32I-NEXT: .LBB14_125: -; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: mv t4, s7 ; RV32I-NEXT: .LBB14_126: -; RV32I-NEXT: beq t1, s0, .LBB14_138 +; RV32I-NEXT: beq a7, s1, .LBB14_138 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB14_139 ; RV32I-NEXT: .LBB14_128: -; RV32I-NEXT: beq t1, t6, .LBB14_140 +; RV32I-NEXT: beq a7, s4, .LBB14_140 ; RV32I-NEXT: .LBB14_129: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t4, .LBB14_141 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, t6, .LBB14_141 ; RV32I-NEXT: .LBB14_130: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB14_142 -; RV32I-NEXT: .LBB14_131: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB14_143 +; RV32I-NEXT: bne a7, a5, .LBB14_142 +; RV32I-NEXT: .LBB14_131: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB14_143 ; RV32I-NEXT: .LBB14_132: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_144 -; RV32I-NEXT: .LBB14_133: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_145 +; RV32I-NEXT: bne a7, s8, .LBB14_144 +; RV32I-NEXT: .LBB14_133: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_145 ; RV32I-NEXT: .LBB14_134: -; RV32I-NEXT: bnez a7, .LBB14_146 +; RV32I-NEXT: bnez a4, .LBB14_146 ; RV32I-NEXT: .LBB14_135: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB14_147 ; RV32I-NEXT: .LBB14_136: -; RV32I-NEXT: beqz t1, .LBB14_148 +; RV32I-NEXT: beqz a7, .LBB14_148 ; RV32I-NEXT: .LBB14_137: -; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: bnez t5, .LBB14_149 ; RV32I-NEXT: j .LBB14_150 ; RV32I-NEXT: .LBB14_138: -; RV32I-NEXT: or a4, s8, t3 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t4 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB14_128 ; RV32I-NEXT: .LBB14_139: -; RV32I-NEXT: mv t3, s5 -; RV32I-NEXT: bne t1, t6, .LBB14_129 +; RV32I-NEXT: mv t4, s5 +; RV32I-NEXT: bne a7, s4, .LBB14_129 ; RV32I-NEXT: .LBB14_140: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t4, .LBB14_130 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, t6, .LBB14_130 ; RV32I-NEXT: .LBB14_141: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB14_131 -; RV32I-NEXT: .LBB14_142: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB14_132 +; RV32I-NEXT: beq a7, a5, .LBB14_131 +; RV32I-NEXT: .LBB14_142: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB14_132 ; RV32I-NEXT: .LBB14_143: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_133 -; RV32I-NEXT: .LBB14_144: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_134 +; RV32I-NEXT: beq a7, s8, .LBB14_133 +; RV32I-NEXT: .LBB14_144: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_134 ; RV32I-NEXT: .LBB14_145: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB14_135 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB14_135 ; RV32I-NEXT: .LBB14_146: -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB14_136 ; RV32I-NEXT: .LBB14_147: -; RV32I-NEXT: mv a4, s7 -; RV32I-NEXT: bnez t1, .LBB14_137 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bnez a7, .LBB14_137 ; RV32I-NEXT: .LBB14_148: -; RV32I-NEXT: or a4, s8, a4 -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t3, s9, t3 +; RV32I-NEXT: li t4, 0 ; RV32I-NEXT: beqz t5, .LBB14_150 ; RV32I-NEXT: .LBB14_149: -; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: mv t4, s5 ; RV32I-NEXT: .LBB14_150: -; RV32I-NEXT: beq t1, s0, .LBB14_161 +; RV32I-NEXT: beq a7, s1, .LBB14_161 ; RV32I-NEXT: # %bb.151: -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, t6, .LBB14_162 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s4, .LBB14_162 ; RV32I-NEXT: .LBB14_152: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB14_163 -; RV32I-NEXT: .LBB14_153: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB14_164 +; RV32I-NEXT: bne a7, t6, .LBB14_163 +; RV32I-NEXT: .LBB14_153: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, a5, .LBB14_164 ; RV32I-NEXT: .LBB14_154: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB14_165 -; RV32I-NEXT: .LBB14_155: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_166 +; RV32I-NEXT: bne a7, s0, .LBB14_165 +; RV32I-NEXT: .LBB14_155: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB14_166 ; RV32I-NEXT: .LBB14_156: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_167 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_167 ; RV32I-NEXT: .LBB14_157: -; RV32I-NEXT: bnez a7, .LBB14_168 +; RV32I-NEXT: bnez a4, .LBB14_168 ; RV32I-NEXT: .LBB14_158: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: bnez t5, .LBB14_169 ; RV32I-NEXT: .LBB14_159: -; RV32I-NEXT: beqz t1, .LBB14_170 +; RV32I-NEXT: beqz a7, .LBB14_170 ; RV32I-NEXT: .LBB14_160: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: bne t1, s0, .LBB14_171 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: bne a7, s1, .LBB14_171 ; RV32I-NEXT: j .LBB14_172 ; RV32I-NEXT: .LBB14_161: -; RV32I-NEXT: or a4, s6, t3 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, t6, .LBB14_152 +; RV32I-NEXT: or t3, s6, t4 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s4, .LBB14_152 ; RV32I-NEXT: .LBB14_162: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB14_153 -; RV32I-NEXT: .LBB14_163: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB14_154 +; RV32I-NEXT: beq a7, t6, .LBB14_153 +; RV32I-NEXT: .LBB14_163: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, a5, .LBB14_154 ; RV32I-NEXT: .LBB14_164: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB14_155 -; RV32I-NEXT: .LBB14_165: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_156 +; RV32I-NEXT: beq a7, s0, .LBB14_155 +; RV32I-NEXT: .LBB14_165: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB14_156 ; RV32I-NEXT: .LBB14_166: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_157 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_157 ; RV32I-NEXT: .LBB14_167: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: beqz a7, .LBB14_158 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: beqz a4, .LBB14_158 ; RV32I-NEXT: .LBB14_168: -; RV32I-NEXT: mv t0, a4 -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: beqz t5, .LBB14_159 ; RV32I-NEXT: .LBB14_169: -; RV32I-NEXT: mv a4, s5 -; RV32I-NEXT: bnez t1, .LBB14_160 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bnez a7, .LBB14_160 ; RV32I-NEXT: .LBB14_170: -; RV32I-NEXT: or a4, s6, a4 -; RV32I-NEXT: mv t3, s1 -; RV32I-NEXT: beq t1, s0, .LBB14_172 +; RV32I-NEXT: or t3, s6, t3 +; RV32I-NEXT: mv t4, s2 +; RV32I-NEXT: beq a7, s1, .LBB14_172 ; RV32I-NEXT: .LBB14_171: -; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB14_172: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t6, .LBB14_190 -; RV32I-NEXT: # %bb.173: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t4, .LBB14_191 +; RV32I-NEXT: bne a7, s4, .LBB14_192 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, t6, .LBB14_193 ; RV32I-NEXT: .LBB14_174: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s4, .LBB14_192 -; RV32I-NEXT: .LBB14_175: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s9, .LBB14_193 +; RV32I-NEXT: bne a7, a5, .LBB14_194 +; RV32I-NEXT: .LBB14_175: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s0, .LBB14_195 ; RV32I-NEXT: .LBB14_176: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_194 -; RV32I-NEXT: .LBB14_177: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_195 +; RV32I-NEXT: bne a7, s8, .LBB14_196 +; RV32I-NEXT: .LBB14_177: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_197 ; RV32I-NEXT: .LBB14_178: -; RV32I-NEXT: bnez a7, .LBB14_196 +; RV32I-NEXT: bnez a4, .LBB14_198 ; RV32I-NEXT: .LBB14_179: -; RV32I-NEXT: bnez t1, .LBB14_197 +; RV32I-NEXT: bnez a7, .LBB14_199 ; RV32I-NEXT: .LBB14_180: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s0, .LBB14_198 -; RV32I-NEXT: .LBB14_181: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, t6, .LBB14_199 +; RV32I-NEXT: bne a7, s1, .LBB14_200 +; RV32I-NEXT: .LBB14_181: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s4, .LBB14_183 ; RV32I-NEXT: .LBB14_182: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, t4, .LBB14_200 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: .LBB14_183: +; RV32I-NEXT: li t5, 4 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s4, .LBB14_201 -; RV32I-NEXT: .LBB14_184: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s9, .LBB14_202 +; RV32I-NEXT: beq a7, t6, .LBB14_185 +; RV32I-NEXT: # %bb.184: +; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB14_185: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a7, t5, .LBB14_201 +; RV32I-NEXT: # %bb.186: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bne t1, s2, .LBB14_203 -; RV32I-NEXT: .LBB14_186: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bne t1, s3, .LBB14_204 +; RV32I-NEXT: bne a7, s0, .LBB14_202 ; RV32I-NEXT: .LBB14_187: -; RV32I-NEXT: beqz a7, .LBB14_189 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s8, .LBB14_203 ; RV32I-NEXT: .LBB14_188: -; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne a7, s3, .LBB14_204 ; RV32I-NEXT: .LBB14_189: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t3, ra, 24 -; RV32I-NEXT: srli a7, a1, 16 -; RV32I-NEXT: srli t6, a1, 24 -; RV32I-NEXT: srli t1, a3, 16 -; RV32I-NEXT: srli s2, a3, 24 -; RV32I-NEXT: srli t5, a5, 16 -; RV32I-NEXT: srli s3, a5, 24 -; RV32I-NEXT: srli s1, a6, 16 -; RV32I-NEXT: srli s6, a6, 24 -; RV32I-NEXT: srli s0, t0, 16 -; RV32I-NEXT: srli s5, t0, 24 -; RV32I-NEXT: srli s4, a0, 16 -; RV32I-NEXT: srli s7, a0, 24 -; RV32I-NEXT: srli s8, t2, 16 -; RV32I-NEXT: srli s9, t2, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s10, ra, t4 -; RV32I-NEXT: and s11, a1, t4 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t3, 3(a2) -; RV32I-NEXT: and a4, a3, t4 -; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: beqz a4, .LBB14_191 +; RV32I-NEXT: .LBB14_190: +; RV32I-NEXT: mv t2, t3 +; RV32I-NEXT: .LBB14_191: +; RV32I-NEXT: lui a4, 16 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: srli a7, a5, 16 +; RV32I-NEXT: and t3, a5, a4 +; RV32I-NEXT: srli t4, a5, 24 +; RV32I-NEXT: srli t3, t3, 8 +; RV32I-NEXT: sb a5, 0(a2) +; RV32I-NEXT: sb t3, 1(a2) +; RV32I-NEXT: sb a7, 2(a2) +; RV32I-NEXT: sb t4, 3(a2) +; RV32I-NEXT: srli a5, a1, 16 +; RV32I-NEXT: and a7, a1, a4 +; RV32I-NEXT: srli a7, a7, 8 +; RV32I-NEXT: srli t3, a1, 24 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb t3, 5(a2) -; RV32I-NEXT: sb a7, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and a1, a5, t4 -; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a7, 5(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb t3, 7(a2) +; RV32I-NEXT: srli a1, a3, 16 +; RV32I-NEXT: and a5, a3, a4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a7, a3, 24 ; RV32I-NEXT: sb a3, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a3, a6, t4 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a5, 12(a2) -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a7, 11(a2) +; RV32I-NEXT: srli a1, a6, 16 +; RV32I-NEXT: and a3, a6, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: sb a1, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a1, t0, 16 +; RV32I-NEXT: and a3, t0, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, t0, 24 +; RV32I-NEXT: sb t0, 16(a2) ; RV32I-NEXT: sb a3, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb s6, 19(a2) -; RV32I-NEXT: and a3, a0, t4 -; RV32I-NEXT: and a4, t2, t4 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a1, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a1, t1, 16 +; RV32I-NEXT: and a3, t1, a4 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a1, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: srli a5, t1, 24 +; RV32I-NEXT: sb t1, 20(a2) +; RV32I-NEXT: sb a3, 21(a2) +; RV32I-NEXT: sb a1, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: and a3, a0, a4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, t2, 16 +; RV32I-NEXT: and a1, t2, a4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, t2, 24 ; RV32I-NEXT: sb t2, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload @@ -6058,64 +6001,57 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB14_190: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t4, .LBB14_174 -; RV32I-NEXT: .LBB14_191: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s4, .LBB14_175 ; RV32I-NEXT: .LBB14_192: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s9, .LBB14_176 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, t6, .LBB14_174 ; RV32I-NEXT: .LBB14_193: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_177 -; RV32I-NEXT: .LBB14_194: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_178 +; RV32I-NEXT: beq a7, a5, .LBB14_175 +; RV32I-NEXT: .LBB14_194: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s0, .LBB14_176 ; RV32I-NEXT: .LBB14_195: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz a7, .LBB14_179 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s8, .LBB14_177 ; RV32I-NEXT: .LBB14_196: -; RV32I-NEXT: mv a0, t3 -; RV32I-NEXT: beqz t1, .LBB14_180 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_178 ; RV32I-NEXT: .LBB14_197: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s0, .LBB14_181 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beqz a4, .LBB14_179 ; RV32I-NEXT: .LBB14_198: -; RV32I-NEXT: mv a4, s1 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, t6, .LBB14_182 +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: beqz a7, .LBB14_180 ; RV32I-NEXT: .LBB14_199: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, t4, .LBB14_183 -; RV32I-NEXT: .LBB14_200: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s4, .LBB14_184 +; RV32I-NEXT: beq a7, s1, .LBB14_181 +; RV32I-NEXT: .LBB14_200: +; RV32I-NEXT: mv t3, s2 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: bne a7, s4, .LBB14_182 +; RV32I-NEXT: j .LBB14_183 ; RV32I-NEXT: .LBB14_201: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s9, .LBB14_185 -; RV32I-NEXT: .LBB14_202: -; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: beq t1, s2, .LBB14_186 +; RV32I-NEXT: beq a7, s0, .LBB14_187 +; RV32I-NEXT: .LBB14_202: +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: beq a7, s8, .LBB14_188 ; RV32I-NEXT: .LBB14_203: -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: beq t1, s3, .LBB14_187 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq a7, s3, .LBB14_189 ; RV32I-NEXT: .LBB14_204: -; RV32I-NEXT: mv a4, t3 -; RV32I-NEXT: bnez a7, .LBB14_188 -; RV32I-NEXT: j .LBB14_189 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: bnez a4, .LBB14_190 +; RV32I-NEXT: j .LBB14_191 %src = load i256, ptr %src.ptr, align 1 %dwordOff = load i256, ptr %dwordOff.ptr, align 1 %bitOff = shl i256 %dwordOff, 6 @@ -6141,278 +6077,280 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sd s9, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s10, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s11, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu s0, 13(a0) -; RV64I-NEXT: lbu s1, 14(a0) -; RV64I-NEXT: lbu s2, 15(a0) +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: lbu a7, 10(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: lbu t1, 12(a0) +; RV64I-NEXT: lbu t2, 13(a0) +; RV64I-NEXT: lbu t3, 15(a0) +; RV64I-NEXT: lbu t4, 14(a0) +; RV64I-NEXT: or a4, a4, a3 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, a6, a5 +; RV64I-NEXT: or a3, a6, a5 ; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: slli a6, a5, 16 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a5, t3, t4 ; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: lbu a5, 22(a0) +; RV64I-NEXT: lbu t5, 23(a0) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s0, s0, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: or t6, t6, t5 -; RV64I-NEXT: or s0, s0, a4 -; RV64I-NEXT: or s1, s2, s1 -; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu t5, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: lbu t3, 4(a1) +; RV64I-NEXT: lbu t6, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: lbu s2, 27(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s0, 30(a0) +; RV64I-NEXT: lbu t1, 31(a0) ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or t0, t1, t0 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or t2, t4, a4 -; RV64I-NEXT: or a1, a1, t5 -; RV64I-NEXT: lbu t5, 19(a0) -; RV64I-NEXT: lbu t4, 21(a0) -; RV64I-NEXT: lbu a4, 22(a0) -; RV64I-NEXT: lbu t3, 23(a0) -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t6, t6, 16 -; RV64I-NEXT: slli s1, s1, 16 -; RV64I-NEXT: or s4, s3, a3 -; RV64I-NEXT: or a5, a7, a5 -; RV64I-NEXT: or a6, t6, a6 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: lbu t6, 29(a0) -; RV64I-NEXT: lbu a3, 30(a0) -; RV64I-NEXT: lbu s2, 31(a0) -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or s5, t1, t0 -; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, t6, t3 ; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: or a1, a1, t2 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli s3, t3, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: slli s0, s0, 32 +; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or t1, a5, s4 -; RV64I-NEXT: or a5, s0, a6 -; RV64I-NEXT: or a6, a1, s5 +; RV64I-NEXT: slli s3, t1, 8 +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: sub t2, a6, t0 -; RV64I-NEXT: neg t3, a6 -; RV64I-NEXT: srl s0, t1, t3 -; RV64I-NEXT: bltu a6, t0, .LBB15_2 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t2, a6 +; RV64I-NEXT: sub t3, a6, t1 +; RV64I-NEXT: srl t6, a4, t2 +; RV64I-NEXT: bltu a6, t1, .LBB15_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: sll s4, t1, t2 +; RV64I-NEXT: sll s4, a4, t3 ; RV64I-NEXT: j .LBB15_3 ; RV64I-NEXT: .LBB15_2: -; RV64I-NEXT: sll a1, t1, a6 -; RV64I-NEXT: sll s4, a5, a6 -; RV64I-NEXT: or s4, s0, s4 +; RV64I-NEXT: sll a7, a3, a6 +; RV64I-NEXT: sll a1, a4, a6 +; RV64I-NEXT: or s4, t6, a7 ; RV64I-NEXT: .LBB15_3: -; RV64I-NEXT: slli t5, t5, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, s3, a4 -; RV64I-NEXT: lbu ra, 17(a0) -; RV64I-NEXT: lbu s11, 18(a0) -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s5, 25(a0) -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: lbu s7, 26(a0) -; RV64I-NEXT: lbu s6, 28(a0) -; RV64I-NEXT: slli s10, t6, 8 -; RV64I-NEXT: or s9, s2, a3 -; RV64I-NEXT: sub a4, a7, a6 -; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: slli s8, t4, 8 +; RV64I-NEXT: lbu s10, 17(a0) +; RV64I-NEXT: lbu s9, 18(a0) +; RV64I-NEXT: lbu t4, 20(a0) +; RV64I-NEXT: lbu s6, 25(a0) +; RV64I-NEXT: lbu s5, 26(a0) +; RV64I-NEXT: lbu s7, 28(a0) +; RV64I-NEXT: slli a7, t0, 8 +; RV64I-NEXT: or ra, t5, a5 +; RV64I-NEXT: slli s11, s2, 8 +; RV64I-NEXT: slli s2, s1, 8 +; RV64I-NEXT: or s3, s3, s0 +; RV64I-NEXT: li a5, 128 +; RV64I-NEXT: sub t0, a5, a6 +; RV64I-NEXT: mv a5, a3 ; RV64I-NEXT: beqz a6, .LBB15_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: mv a3, s4 +; RV64I-NEXT: mv a5, s4 ; RV64I-NEXT: .LBB15_5: -; RV64I-NEXT: slli t6, ra, 8 -; RV64I-NEXT: or t5, t5, s11 -; RV64I-NEXT: or t4, t4, s8 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: lbu s8, 16(a0) +; RV64I-NEXT: slli s0, s10, 8 +; RV64I-NEXT: or t5, s8, s9 +; RV64I-NEXT: lbu s1, 16(a0) ; RV64I-NEXT: lbu a0, 24(a0) -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: or s2, s1, s7 -; RV64I-NEXT: or s1, s10, s6 -; RV64I-NEXT: slli s4, s9, 16 -; RV64I-NEXT: bltu a4, t0, .LBB15_7 +; RV64I-NEXT: or t4, a7, t4 +; RV64I-NEXT: slli s8, ra, 16 +; RV64I-NEXT: slli s6, s6, 8 +; RV64I-NEXT: or s4, s11, s5 +; RV64I-NEXT: or s2, s2, s7 +; RV64I-NEXT: slli s3, s3, 16 +; RV64I-NEXT: bltu t0, t1, .LBB15_7 ; RV64I-NEXT: # %bb.6: -; RV64I-NEXT: sub s0, a4, t0 -; RV64I-NEXT: srl s0, a5, s0 +; RV64I-NEXT: sub a7, t0, t1 +; RV64I-NEXT: srl a7, a3, a7 ; RV64I-NEXT: j .LBB15_8 ; RV64I-NEXT: .LBB15_7: -; RV64I-NEXT: neg s6, a4 -; RV64I-NEXT: sll s6, a5, s6 -; RV64I-NEXT: or s0, s0, s6 +; RV64I-NEXT: neg a7, t0 +; RV64I-NEXT: sll a7, a3, a7 +; RV64I-NEXT: or a7, t6, a7 ; RV64I-NEXT: .LBB15_8: -; RV64I-NEXT: or t6, t6, s8 -; RV64I-NEXT: slli s6, t5, 16 -; RV64I-NEXT: or s3, s3, t4 -; RV64I-NEXT: or t5, s5, a0 -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: mv t4, t1 -; RV64I-NEXT: beqz a4, .LBB15_10 +; RV64I-NEXT: or s0, s0, s1 +; RV64I-NEXT: slli s1, t5, 16 +; RV64I-NEXT: or t6, s8, t4 +; RV64I-NEXT: or t5, s6, a0 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: mv t4, a4 +; RV64I-NEXT: beqz t0, .LBB15_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t4, s0 +; RV64I-NEXT: mv t4, a7 ; RV64I-NEXT: .LBB15_10: -; RV64I-NEXT: or a0, s6, t6 -; RV64I-NEXT: slli s0, s3, 32 -; RV64I-NEXT: or t6, s2, t5 -; RV64I-NEXT: slli s1, s1, 32 -; RV64I-NEXT: bltu a4, t0, .LBB15_12 +; RV64I-NEXT: or a0, s1, s0 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, s4, t5 +; RV64I-NEXT: slli s0, s2, 32 +; RV64I-NEXT: bltu t0, t1, .LBB15_12 ; RV64I-NEXT: # %bb.11: ; RV64I-NEXT: li t5, 0 ; RV64I-NEXT: j .LBB15_13 ; RV64I-NEXT: .LBB15_12: -; RV64I-NEXT: srl t5, a5, t3 +; RV64I-NEXT: srl t5, a3, t2 ; RV64I-NEXT: .LBB15_13: -; RV64I-NEXT: or a4, s0, a0 -; RV64I-NEXT: or a0, s1, t6 -; RV64I-NEXT: bltu a6, t0, .LBB15_15 +; RV64I-NEXT: or t0, t6, a0 +; RV64I-NEXT: or a0, s0, a7 +; RV64I-NEXT: bltu a6, t1, .LBB15_15 ; RV64I-NEXT: # %bb.14: -; RV64I-NEXT: li t6, 0 -; RV64I-NEXT: sll t2, a4, t2 +; RV64I-NEXT: li t2, 0 +; RV64I-NEXT: sll a7, t0, t3 ; RV64I-NEXT: j .LBB15_16 ; RV64I-NEXT: .LBB15_15: -; RV64I-NEXT: sll t6, a4, a6 -; RV64I-NEXT: srl t2, a4, t3 +; RV64I-NEXT: srl a7, t0, t2 ; RV64I-NEXT: sll t3, a0, a6 -; RV64I-NEXT: or t2, t2, t3 +; RV64I-NEXT: sll t2, t0, a6 +; RV64I-NEXT: or a7, a7, t3 ; RV64I-NEXT: .LBB15_16: -; RV64I-NEXT: sub s0, a6, a7 -; RV64I-NEXT: mv t3, a0 +; RV64I-NEXT: li t3, 128 +; RV64I-NEXT: sub s0, a6, t3 +; RV64I-NEXT: mv t6, a0 ; RV64I-NEXT: beqz a6, .LBB15_18 ; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: mv t3, t2 +; RV64I-NEXT: mv t6, a7 ; RV64I-NEXT: .LBB15_18: -; RV64I-NEXT: bltu s0, t0, .LBB15_20 +; RV64I-NEXT: bltu s0, t1, .LBB15_20 ; RV64I-NEXT: # %bb.19: -; RV64I-NEXT: li t2, 0 -; RV64I-NEXT: sub t0, s0, t0 -; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub a7, s0, t1 +; RV64I-NEXT: sll a4, a4, a7 ; RV64I-NEXT: bnez s0, .LBB15_21 ; RV64I-NEXT: j .LBB15_22 ; RV64I-NEXT: .LBB15_20: -; RV64I-NEXT: sll t2, t1, s0 -; RV64I-NEXT: neg t0, s0 -; RV64I-NEXT: srl t0, t1, t0 -; RV64I-NEXT: sll t1, a5, s0 -; RV64I-NEXT: or t0, t0, t1 +; RV64I-NEXT: neg a7, s0 +; RV64I-NEXT: srl a7, a4, a7 +; RV64I-NEXT: sll t1, a3, s0 +; RV64I-NEXT: sll t3, a4, s0 +; RV64I-NEXT: or a4, a7, t1 ; RV64I-NEXT: beqz s0, .LBB15_22 ; RV64I-NEXT: .LBB15_21: -; RV64I-NEXT: mv a5, t0 +; RV64I-NEXT: mv a3, a4 ; RV64I-NEXT: .LBB15_22: -; RV64I-NEXT: bltu a6, a7, .LBB15_24 +; RV64I-NEXT: li a4, 128 +; RV64I-NEXT: bltu a6, a4, .LBB15_24 ; RV64I-NEXT: # %bb.23: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a5, 0 ; RV64I-NEXT: bnez a6, .LBB15_25 ; RV64I-NEXT: j .LBB15_26 ; RV64I-NEXT: .LBB15_24: -; RV64I-NEXT: or t2, t4, t6 -; RV64I-NEXT: or a5, t5, t3 +; RV64I-NEXT: or t3, t4, t2 +; RV64I-NEXT: or a3, t5, t6 ; RV64I-NEXT: beqz a6, .LBB15_26 ; RV64I-NEXT: .LBB15_25: -; RV64I-NEXT: mv a4, t2 -; RV64I-NEXT: mv a0, a5 +; RV64I-NEXT: mv t0, t3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB15_26: -; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: srliw a6, a1, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a1, 24 -; RV64I-NEXT: srli t0, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a4, 32 -; RV64I-NEXT: srliw s2, a4, 16 -; RV64I-NEXT: srliw s6, a4, 24 -; RV64I-NEXT: srli s4, a4, 48 -; RV64I-NEXT: srli s7, a4, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a1, t2 -; RV64I-NEXT: srli s11, s11, 8 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw a7, a1, 24 +; RV64I-NEXT: and t1, a1, a3 +; RV64I-NEXT: srli t1, t1, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) +; RV64I-NEXT: sb t1, 1(a2) ; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a1, a5, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a1, a3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a1, a7, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a1, a4, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a7, 3(a2) +; RV64I-NEXT: and a6, a4, a3 +; RV64I-NEXT: srli a7, a1, 48 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a6, 5(a2) +; RV64I-NEXT: sb a7, 6(a2) +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a5, 32 +; RV64I-NEXT: srliw a4, a5, 16 +; RV64I-NEXT: and a6, a5, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srliw a7, a5, 24 +; RV64I-NEXT: sb a5, 8(a2) +; RV64I-NEXT: sb a6, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a7, 11(a2) +; RV64I-NEXT: srli a4, a5, 48 +; RV64I-NEXT: and a6, a1, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: sb a6, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: sb a5, 15(a2) +; RV64I-NEXT: srli a1, t0, 32 +; RV64I-NEXT: and a4, t0, a3 +; RV64I-NEXT: srliw a5, t0, 16 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srliw a6, t0, 24 +; RV64I-NEXT: sb t0, 16(a2) +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: and a4, a1, a3 +; RV64I-NEXT: srli a5, t0, 48 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: sb a1, 20(a2) +; RV64I-NEXT: sb a4, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) ; RV64I-NEXT: ld ra, 104(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 96(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 88(sp) # 8-byte Folded Reload @@ -6447,125 +6385,125 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a6 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a5, a7, t1 +; RV32I-NEXT: or a1, t0, a1 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a6, a5, a3 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a6, a4, a3 ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: srli a5, a1, 5 -; RV32I-NEXT: sll t5, a6, a1 +; RV32I-NEXT: srli a4, a1, 5 +; RV32I-NEXT: sll t4, a6, a1 ; RV32I-NEXT: li s7, 1 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beqz a5, .LBB15_2 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beqz a4, .LBB15_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: beq a5, s7, .LBB15_4 +; RV32I-NEXT: beq a4, s7, .LBB15_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a4, a3 +; RV32I-NEXT: mv a5, a3 ; RV32I-NEXT: .LBB15_4: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li s0, 3 -; RV32I-NEXT: beq a5, s8, .LBB15_6 +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: beq a4, s8, .LBB15_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv a7, a4 +; RV32I-NEXT: mv a7, a5 ; RV32I-NEXT: .LBB15_6: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: beq a5, s0, .LBB15_8 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: li s3, 4 +; RV32I-NEXT: beq a4, t6, .LBB15_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB15_8: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: li s5, 5 -; RV32I-NEXT: beq a5, s4, .LBB15_10 +; RV32I-NEXT: li s4, 5 +; RV32I-NEXT: beq a4, s3, .LBB15_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: mv t0, a5 ; RV32I-NEXT: .LBB15_10: ; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s5, .LBB15_12 +; RV32I-NEXT: beq a4, s4, .LBB15_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB15_12: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li s0, 6 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_14 +; RV32I-NEXT: beq a4, s0, .LBB15_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB15_14: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu a3, 4(a0) -; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t3, 4(a0) ; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: or a3, t2, t1 ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_16 +; RV32I-NEXT: beq a4, ra, .LBB15_16 ; RV32I-NEXT: # %bb.15: -; RV32I-NEXT: mv a7, a4 +; RV32I-NEXT: mv a7, a5 ; RV32I-NEXT: .LBB15_16: -; RV32I-NEXT: or a3, t0, a3 -; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: andi t6, a1, 31 -; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: or t1, t0, t3 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: andi t5, a1, 31 +; RV32I-NEXT: mv a5, a6 ; RV32I-NEXT: beqz a1, .LBB15_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB15_18: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t1, a3 -; RV32I-NEXT: neg s3, t6 -; RV32I-NEXT: srl s11, a6, s3 -; RV32I-NEXT: beqz t6, .LBB15_20 +; RV32I-NEXT: neg s2, t5 +; RV32I-NEXT: or a7, a3, t1 +; RV32I-NEXT: srl s11, a6, s2 +; RV32I-NEXT: beqz t5, .LBB15_20 ; RV32I-NEXT: # %bb.19: ; RV32I-NEXT: mv t0, s11 ; RV32I-NEXT: .LBB15_20: ; RV32I-NEXT: sll s10, a7, a1 -; RV32I-NEXT: beqz a5, .LBB15_22 +; RV32I-NEXT: beqz a4, .LBB15_22 ; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: bne a5, s7, .LBB15_23 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: bne a4, s7, .LBB15_23 ; RV32I-NEXT: j .LBB15_24 ; RV32I-NEXT: .LBB15_22: ; RV32I-NEXT: or a3, s10, t0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: beq a5, s7, .LBB15_24 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beq a4, s7, .LBB15_24 ; RV32I-NEXT: .LBB15_23: ; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB15_24: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s8, .LBB15_40 +; RV32I-NEXT: bne a4, s8, .LBB15_40 ; RV32I-NEXT: # %bb.25: ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bne a5, s0, .LBB15_41 +; RV32I-NEXT: bne a4, t6, .LBB15_41 ; RV32I-NEXT: .LBB15_26: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB15_28 +; RV32I-NEXT: beq a4, s3, .LBB15_28 ; RV32I-NEXT: .LBB15_27: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: .LBB15_28: ; RV32I-NEXT: lbu t2, 11(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s5, .LBB15_30 +; RV32I-NEXT: beq a4, s4, .LBB15_30 ; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB15_30: @@ -6573,7 +6511,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t1, 9(a0) ; RV32I-NEXT: lbu a3, 10(a0) ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_32 +; RV32I-NEXT: beq a4, s0, .LBB15_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB15_32: @@ -6581,600 +6519,600 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t3, 8(a0) ; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_34 +; RV32I-NEXT: beq a4, ra, .LBB15_34 ; RV32I-NEXT: # %bb.33: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB15_34: ; RV32I-NEXT: or a3, t1, t3 -; RV32I-NEXT: slli a6, t2, 16 -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv a6, a7 ; RV32I-NEXT: beqz a1, .LBB15_36 ; RV32I-NEXT: # %bb.35: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB15_36: +; RV32I-NEXT: sw a6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, a6, a3 -; RV32I-NEXT: srl s2, a7, s3 -; RV32I-NEXT: beqz t6, .LBB15_38 +; RV32I-NEXT: or a6, t2, a3 +; RV32I-NEXT: srl s1, a7, s2 +; RV32I-NEXT: beqz t5, .LBB15_38 ; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: mv t0, s2 +; RV32I-NEXT: mv t0, s1 ; RV32I-NEXT: .LBB15_38: ; RV32I-NEXT: sll s9, a6, a1 -; RV32I-NEXT: beqz a5, .LBB15_42 +; RV32I-NEXT: beqz a4, .LBB15_42 ; RV32I-NEXT: # %bb.39: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_43 +; RV32I-NEXT: bnez t5, .LBB15_43 ; RV32I-NEXT: j .LBB15_44 ; RV32I-NEXT: .LBB15_40: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: beq a5, s0, .LBB15_26 +; RV32I-NEXT: beq a4, t6, .LBB15_26 ; RV32I-NEXT: .LBB15_41: ; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB15_27 +; RV32I-NEXT: bne a4, s3, .LBB15_27 ; RV32I-NEXT: j .LBB15_28 ; RV32I-NEXT: .LBB15_42: ; RV32I-NEXT: or a7, s9, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_44 +; RV32I-NEXT: beqz t5, .LBB15_44 ; RV32I-NEXT: .LBB15_43: ; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB15_44: -; RV32I-NEXT: beq a5, s7, .LBB15_61 +; RV32I-NEXT: beq a4, s7, .LBB15_61 ; RV32I-NEXT: # %bb.45: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne a5, s8, .LBB15_62 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: bne a4, s8, .LBB15_62 ; RV32I-NEXT: .LBB15_46: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bne a5, s0, .LBB15_63 +; RV32I-NEXT: bne a4, t6, .LBB15_63 ; RV32I-NEXT: .LBB15_47: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB15_49 +; RV32I-NEXT: beq a4, s3, .LBB15_49 ; RV32I-NEXT: .LBB15_48: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB15_49: -; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: lbu t2, 15(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s5, .LBB15_51 +; RV32I-NEXT: beq a4, s4, .LBB15_51 ; RV32I-NEXT: # %bb.50: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB15_51: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 13(a0) ; RV32I-NEXT: lbu a3, 14(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_53 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB15_53 ; RV32I-NEXT: # %bb.52: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB15_53: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 12(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_55 +; RV32I-NEXT: beq a4, ra, .LBB15_55 ; RV32I-NEXT: # %bb.54: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB15_55: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: mv a7, a6 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv s5, a6 ; RV32I-NEXT: beqz a1, .LBB15_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: mv s5, t0 ; RV32I-NEXT: .LBB15_57: -; RV32I-NEXT: sw a7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t3, a3 -; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: or a7, t2, a3 +; RV32I-NEXT: srl a3, a6, s2 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB15_59 +; RV32I-NEXT: beqz t5, .LBB15_59 ; RV32I-NEXT: # %bb.58: ; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_59: ; RV32I-NEXT: sll a3, a7, a1 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB15_64 +; RV32I-NEXT: beqz a4, .LBB15_64 ; RV32I-NEXT: # %bb.60: ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_65 +; RV32I-NEXT: bnez t5, .LBB15_65 ; RV32I-NEXT: j .LBB15_66 ; RV32I-NEXT: .LBB15_61: ; RV32I-NEXT: or a7, s10, a3 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s8, .LBB15_46 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, s8, .LBB15_46 ; RV32I-NEXT: .LBB15_62: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s0, .LBB15_47 +; RV32I-NEXT: beq a4, t6, .LBB15_47 ; RV32I-NEXT: .LBB15_63: ; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB15_48 +; RV32I-NEXT: bne a4, s3, .LBB15_48 ; RV32I-NEXT: j .LBB15_49 ; RV32I-NEXT: .LBB15_64: ; RV32I-NEXT: or a6, a3, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_66 +; RV32I-NEXT: beqz t5, .LBB15_66 ; RV32I-NEXT: .LBB15_65: -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: .LBB15_66: -; RV32I-NEXT: beq a5, s7, .LBB15_84 +; RV32I-NEXT: beq a4, s7, .LBB15_84 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_85 +; RV32I-NEXT: bnez t5, .LBB15_85 ; RV32I-NEXT: .LBB15_68: -; RV32I-NEXT: beq a5, s8, .LBB15_86 +; RV32I-NEXT: beq a4, s8, .LBB15_86 ; RV32I-NEXT: .LBB15_69: -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: bne a5, s0, .LBB15_87 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: bne a4, t6, .LBB15_87 ; RV32I-NEXT: .LBB15_70: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB15_72 +; RV32I-NEXT: beq a4, s3, .LBB15_72 ; RV32I-NEXT: .LBB15_71: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB15_72: -; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: lbu t2, 19(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s5, .LBB15_74 +; RV32I-NEXT: beq a4, s4, .LBB15_74 ; RV32I-NEXT: # %bb.73: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB15_74: ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: lbu t1, 17(a0) ; RV32I-NEXT: lbu a3, 18(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_76 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB15_76 ; RV32I-NEXT: # %bb.75: ; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB15_76: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 16(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 16(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_78 +; RV32I-NEXT: beq a4, ra, .LBB15_78 ; RV32I-NEXT: # %bb.77: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB15_78: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: mv s6, a7 ; RV32I-NEXT: beqz a1, .LBB15_80 ; RV32I-NEXT: # %bb.79: ; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB15_80: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, t3, a3 -; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: or a6, t2, a3 +; RV32I-NEXT: srl a3, a7, s2 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB15_82 +; RV32I-NEXT: beqz t5, .LBB15_82 ; RV32I-NEXT: # %bb.81: ; RV32I-NEXT: lw t0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_82: ; RV32I-NEXT: sll a3, a6, a1 ; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB15_88 +; RV32I-NEXT: beqz a4, .LBB15_88 ; RV32I-NEXT: # %bb.83: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_89 +; RV32I-NEXT: bnez t5, .LBB15_89 ; RV32I-NEXT: j .LBB15_90 ; RV32I-NEXT: .LBB15_84: ; RV32I-NEXT: or a6, s9, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_68 +; RV32I-NEXT: beqz t5, .LBB15_68 ; RV32I-NEXT: .LBB15_85: ; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne a5, s8, .LBB15_69 +; RV32I-NEXT: bne a4, s8, .LBB15_69 ; RV32I-NEXT: .LBB15_86: ; RV32I-NEXT: or a6, s10, a3 -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: beq a5, s0, .LBB15_70 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: beq a4, t6, .LBB15_70 ; RV32I-NEXT: .LBB15_87: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB15_71 +; RV32I-NEXT: bne a4, s3, .LBB15_71 ; RV32I-NEXT: j .LBB15_72 ; RV32I-NEXT: .LBB15_88: ; RV32I-NEXT: or a7, a3, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_90 +; RV32I-NEXT: beqz t5, .LBB15_90 ; RV32I-NEXT: .LBB15_89: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_90: -; RV32I-NEXT: beq a5, s7, .LBB15_109 +; RV32I-NEXT: beq a4, s7, .LBB15_109 ; RV32I-NEXT: # %bb.91: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_110 +; RV32I-NEXT: bnez t5, .LBB15_110 ; RV32I-NEXT: .LBB15_92: -; RV32I-NEXT: beq a5, s8, .LBB15_111 +; RV32I-NEXT: beq a4, s8, .LBB15_111 ; RV32I-NEXT: .LBB15_93: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_112 +; RV32I-NEXT: bnez t5, .LBB15_112 ; RV32I-NEXT: .LBB15_94: -; RV32I-NEXT: beq a5, s0, .LBB15_113 +; RV32I-NEXT: beq a4, t6, .LBB15_113 ; RV32I-NEXT: .LBB15_95: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s4, .LBB15_97 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, s3, .LBB15_97 ; RV32I-NEXT: .LBB15_96: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB15_97: -; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: lbu t2, 23(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s5, .LBB15_99 +; RV32I-NEXT: beq a4, s4, .LBB15_99 ; RV32I-NEXT: # %bb.98: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB15_99: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 21(a0) ; RV32I-NEXT: lbu a3, 22(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_101 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB15_101 ; RV32I-NEXT: # %bb.100: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB15_101: ; RV32I-NEXT: sw s9, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 20(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 20(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_103 +; RV32I-NEXT: beq a4, ra, .LBB15_103 ; RV32I-NEXT: # %bb.102: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB15_103: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: mv s9, a6 ; RV32I-NEXT: beqz a1, .LBB15_105 ; RV32I-NEXT: # %bb.104: ; RV32I-NEXT: mv s9, t0 ; RV32I-NEXT: .LBB15_105: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: or t0, t3, a3 -; RV32I-NEXT: srl a6, a6, s3 -; RV32I-NEXT: beqz t6, .LBB15_107 +; RV32I-NEXT: or t0, t2, a3 +; RV32I-NEXT: srl a6, a6, s2 +; RV32I-NEXT: beqz t5, .LBB15_107 ; RV32I-NEXT: # %bb.106: ; RV32I-NEXT: mv t1, a6 ; RV32I-NEXT: .LBB15_107: ; RV32I-NEXT: sll a3, t0, a1 ; RV32I-NEXT: sw a3, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB15_114 +; RV32I-NEXT: beqz a4, .LBB15_114 ; RV32I-NEXT: # %bb.108: ; RV32I-NEXT: li t1, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_115 +; RV32I-NEXT: bnez t5, .LBB15_115 ; RV32I-NEXT: j .LBB15_116 ; RV32I-NEXT: .LBB15_109: ; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: or a7, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_92 +; RV32I-NEXT: beqz t5, .LBB15_92 ; RV32I-NEXT: .LBB15_110: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bne a5, s8, .LBB15_93 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: bne a4, s8, .LBB15_93 ; RV32I-NEXT: .LBB15_111: ; RV32I-NEXT: or a7, s9, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_94 +; RV32I-NEXT: beqz t5, .LBB15_94 ; RV32I-NEXT: .LBB15_112: ; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne a5, s0, .LBB15_95 +; RV32I-NEXT: bne a4, t6, .LBB15_95 ; RV32I-NEXT: .LBB15_113: ; RV32I-NEXT: or a7, s10, a3 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne a5, s4, .LBB15_96 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: bne a4, s3, .LBB15_96 ; RV32I-NEXT: j .LBB15_97 ; RV32I-NEXT: .LBB15_114: ; RV32I-NEXT: or t1, a3, t1 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_116 +; RV32I-NEXT: beqz t5, .LBB15_116 ; RV32I-NEXT: .LBB15_115: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_116: -; RV32I-NEXT: beq a5, s7, .LBB15_136 +; RV32I-NEXT: beq a4, s7, .LBB15_136 ; RV32I-NEXT: # %bb.117: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_137 +; RV32I-NEXT: bnez t5, .LBB15_137 ; RV32I-NEXT: .LBB15_118: -; RV32I-NEXT: beq a5, s8, .LBB15_138 +; RV32I-NEXT: beq a4, s8, .LBB15_138 ; RV32I-NEXT: .LBB15_119: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_139 +; RV32I-NEXT: bnez t5, .LBB15_139 ; RV32I-NEXT: .LBB15_120: -; RV32I-NEXT: beq a5, s0, .LBB15_140 +; RV32I-NEXT: beq a4, t6, .LBB15_140 ; RV32I-NEXT: .LBB15_121: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_141 +; RV32I-NEXT: bnez t5, .LBB15_141 ; RV32I-NEXT: .LBB15_122: -; RV32I-NEXT: bne a5, s4, .LBB15_124 +; RV32I-NEXT: bne a4, s3, .LBB15_124 ; RV32I-NEXT: .LBB15_123: ; RV32I-NEXT: or t1, s10, a3 ; RV32I-NEXT: .LBB15_124: -; RV32I-NEXT: lbu s0, 27(a0) -; RV32I-NEXT: mv t3, t5 -; RV32I-NEXT: beq a5, s5, .LBB15_126 +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: mv t2, t4 +; RV32I-NEXT: beq a4, s4, .LBB15_126 ; RV32I-NEXT: # %bb.125: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB15_126: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu t3, 25(a0) ; RV32I-NEXT: lbu a3, 26(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: beq a5, s1, .LBB15_128 +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: beq a4, s0, .LBB15_128 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: mv t1, t2 ; RV32I-NEXT: .LBB15_128: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: or s0, s0, a3 -; RV32I-NEXT: slli a3, t4, 8 -; RV32I-NEXT: beq a5, ra, .LBB15_130 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s0, 24(a0) +; RV32I-NEXT: or t6, t6, a3 +; RV32I-NEXT: slli a3, t3, 8 +; RV32I-NEXT: beq a4, ra, .LBB15_130 ; RV32I-NEXT: # %bb.129: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB15_130: -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: or a3, a3, s0 +; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: mv ra, t0 ; RV32I-NEXT: beqz a1, .LBB15_132 ; RV32I-NEXT: # %bb.131: -; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: mv ra, t2 ; RV32I-NEXT: .LBB15_132: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or t3, s0, a3 -; RV32I-NEXT: srl t0, t0, s3 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: beqz t6, .LBB15_134 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t2, t6, a3 +; RV32I-NEXT: srl t0, t0, s2 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: beqz t5, .LBB15_134 ; RV32I-NEXT: # %bb.133: -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: mv t3, t0 ; RV32I-NEXT: .LBB15_134: -; RV32I-NEXT: sll t1, t3, a1 -; RV32I-NEXT: li s0, 3 -; RV32I-NEXT: beqz a5, .LBB15_142 +; RV32I-NEXT: sll t1, t2, a1 +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: beqz a4, .LBB15_142 ; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_143 +; RV32I-NEXT: bnez t5, .LBB15_143 ; RV32I-NEXT: j .LBB15_144 ; RV32I-NEXT: .LBB15_136: ; RV32I-NEXT: lw a7, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_118 +; RV32I-NEXT: beqz t5, .LBB15_118 ; RV32I-NEXT: .LBB15_137: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s8, .LBB15_119 +; RV32I-NEXT: bne a4, s8, .LBB15_119 ; RV32I-NEXT: .LBB15_138: ; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_120 +; RV32I-NEXT: beqz t5, .LBB15_120 ; RV32I-NEXT: .LBB15_139: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bne a5, s0, .LBB15_121 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: bne a4, t6, .LBB15_121 ; RV32I-NEXT: .LBB15_140: ; RV32I-NEXT: lw a7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_122 +; RV32I-NEXT: beqz t5, .LBB15_122 ; RV32I-NEXT: .LBB15_141: ; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: beq a5, s4, .LBB15_123 +; RV32I-NEXT: beq a4, s3, .LBB15_123 ; RV32I-NEXT: j .LBB15_124 ; RV32I-NEXT: .LBB15_142: -; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: or t3, t1, t3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_144 +; RV32I-NEXT: beqz t5, .LBB15_144 ; RV32I-NEXT: .LBB15_143: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: .LBB15_144: -; RV32I-NEXT: beq a5, s7, .LBB15_166 +; RV32I-NEXT: beq a4, s7, .LBB15_166 ; RV32I-NEXT: # %bb.145: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_167 +; RV32I-NEXT: bnez t5, .LBB15_167 ; RV32I-NEXT: .LBB15_146: -; RV32I-NEXT: beq a5, s8, .LBB15_168 +; RV32I-NEXT: beq a4, s8, .LBB15_168 ; RV32I-NEXT: .LBB15_147: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_169 +; RV32I-NEXT: bnez t5, .LBB15_169 ; RV32I-NEXT: .LBB15_148: -; RV32I-NEXT: beq a5, s0, .LBB15_170 +; RV32I-NEXT: beq a4, t6, .LBB15_170 ; RV32I-NEXT: .LBB15_149: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_171 +; RV32I-NEXT: bnez t5, .LBB15_171 ; RV32I-NEXT: .LBB15_150: -; RV32I-NEXT: bne a5, s4, .LBB15_152 +; RV32I-NEXT: bne a4, s3, .LBB15_152 ; RV32I-NEXT: .LBB15_151: ; RV32I-NEXT: lw a7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: .LBB15_152: ; RV32I-NEXT: li a7, 1 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_154 +; RV32I-NEXT: beqz t5, .LBB15_154 ; RV32I-NEXT: # %bb.153: ; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB15_154: ; RV32I-NEXT: li s7, 2 ; RV32I-NEXT: li s8, 3 -; RV32I-NEXT: bne a5, s5, .LBB15_156 +; RV32I-NEXT: bne a4, s4, .LBB15_156 ; RV32I-NEXT: # %bb.155: -; RV32I-NEXT: or t4, s10, a3 +; RV32I-NEXT: or t3, s10, a3 ; RV32I-NEXT: .LBB15_156: -; RV32I-NEXT: lbu s0, 31(a0) -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s1, .LBB15_158 -; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, s0, .LBB15_158 +; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: mv a3, t3 ; RV32I-NEXT: .LBB15_158: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: lbu s5, 29(a0) -; RV32I-NEXT: lbu s1, 30(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: li s4, 7 -; RV32I-NEXT: beq a5, s4, .LBB15_160 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s4, 29(a0) +; RV32I-NEXT: lbu s0, 30(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: beq a4, s3, .LBB15_160 ; RV32I-NEXT: # %bb.159: -; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: mv t3, a3 ; RV32I-NEXT: .LBB15_160: ; RV32I-NEXT: lbu a3, 28(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: or t6, t6, s0 +; RV32I-NEXT: mv a0, t2 ; RV32I-NEXT: beqz a1, .LBB15_162 ; RV32I-NEXT: # %bb.161: -; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: mv a0, t3 ; RV32I-NEXT: .LBB15_162: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: beqz t6, .LBB15_164 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or a3, s4, a3 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: li s3, 4 +; RV32I-NEXT: beqz t5, .LBB15_164 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: srl t3, t2, s2 ; RV32I-NEXT: .LBB15_164: -; RV32I-NEXT: or s3, s0, a3 -; RV32I-NEXT: li s0, 5 -; RV32I-NEXT: beqz a5, .LBB15_172 +; RV32I-NEXT: or s2, t6, a3 +; RV32I-NEXT: li t6, 5 +; RV32I-NEXT: beqz a4, .LBB15_172 ; RV32I-NEXT: # %bb.165: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t2, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_173 +; RV32I-NEXT: bnez t5, .LBB15_173 ; RV32I-NEXT: j .LBB15_174 ; RV32I-NEXT: .LBB15_166: ; RV32I-NEXT: lw a7, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_146 +; RV32I-NEXT: beqz t5, .LBB15_146 ; RV32I-NEXT: .LBB15_167: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s8, .LBB15_147 +; RV32I-NEXT: bne a4, s8, .LBB15_147 ; RV32I-NEXT: .LBB15_168: ; RV32I-NEXT: lw a7, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_148 +; RV32I-NEXT: beqz t5, .LBB15_148 ; RV32I-NEXT: .LBB15_169: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB15_149 +; RV32I-NEXT: bne a4, t6, .LBB15_149 ; RV32I-NEXT: .LBB15_170: ; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_150 +; RV32I-NEXT: beqz t5, .LBB15_150 ; RV32I-NEXT: .LBB15_171: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: beq a5, s4, .LBB15_151 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: beq a4, s3, .LBB15_151 ; RV32I-NEXT: j .LBB15_152 ; RV32I-NEXT: .LBB15_172: -; RV32I-NEXT: sll a3, s3, a1 -; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: sll a3, s2, a1 +; RV32I-NEXT: or t2, a3, t3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_174 +; RV32I-NEXT: beqz t5, .LBB15_174 ; RV32I-NEXT: .LBB15_173: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB15_174: -; RV32I-NEXT: beq a5, a7, .LBB15_189 +; RV32I-NEXT: beq a4, a7, .LBB15_189 ; RV32I-NEXT: # %bb.175: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_190 +; RV32I-NEXT: bnez t5, .LBB15_190 ; RV32I-NEXT: .LBB15_176: -; RV32I-NEXT: beq a5, s7, .LBB15_191 +; RV32I-NEXT: beq a4, s7, .LBB15_191 ; RV32I-NEXT: .LBB15_177: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_192 +; RV32I-NEXT: bnez t5, .LBB15_192 ; RV32I-NEXT: .LBB15_178: -; RV32I-NEXT: beq a5, s8, .LBB15_193 +; RV32I-NEXT: beq a4, s8, .LBB15_193 ; RV32I-NEXT: .LBB15_179: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_194 +; RV32I-NEXT: bnez t5, .LBB15_194 ; RV32I-NEXT: .LBB15_180: -; RV32I-NEXT: beq a5, s4, .LBB15_195 +; RV32I-NEXT: beq a4, s3, .LBB15_195 ; RV32I-NEXT: .LBB15_181: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_196 +; RV32I-NEXT: bnez t5, .LBB15_196 ; RV32I-NEXT: .LBB15_182: -; RV32I-NEXT: beq a5, s0, .LBB15_197 +; RV32I-NEXT: beq a4, t6, .LBB15_197 ; RV32I-NEXT: .LBB15_183: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB15_198 +; RV32I-NEXT: bnez t5, .LBB15_198 ; RV32I-NEXT: .LBB15_184: -; RV32I-NEXT: beq a5, s1, .LBB15_199 +; RV32I-NEXT: beq a4, s0, .LBB15_199 ; RV32I-NEXT: .LBB15_185: ; RV32I-NEXT: li a3, 7 -; RV32I-NEXT: bne a5, a3, .LBB15_200 +; RV32I-NEXT: bne a4, a3, .LBB15_200 ; RV32I-NEXT: .LBB15_186: ; RV32I-NEXT: beqz a1, .LBB15_188 ; RV32I-NEXT: .LBB15_187: -; RV32I-NEXT: mv s3, t5 +; RV32I-NEXT: mv s2, t4 ; RV32I-NEXT: .LBB15_188: -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: lui a7, 16 -; RV32I-NEXT: srli a6, a4, 24 -; RV32I-NEXT: srli a3, t2, 16 -; RV32I-NEXT: srli t1, t2, 24 -; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli a5, s2, 16 -; RV32I-NEXT: srli t5, s2, 24 -; RV32I-NEXT: srli t0, s6, 16 -; RV32I-NEXT: srli t6, s6, 24 -; RV32I-NEXT: srli t4, s9, 16 -; RV32I-NEXT: srli s4, s9, 24 -; RV32I-NEXT: srli t3, ra, 16 -; RV32I-NEXT: srli s1, ra, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s7, s3, 16 -; RV32I-NEXT: srli s8, s3, 24 -; RV32I-NEXT: addi a7, a7, -1 -; RV32I-NEXT: and s10, a4, a7 -; RV32I-NEXT: and s11, t2, a7 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: srli a3, a5, 16 +; RV32I-NEXT: and a4, a5, a1 +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a5, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) ; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: and a1, s2, a7 -; RV32I-NEXT: srli a4, s11, 8 -; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli a3, a6, 16 +; RV32I-NEXT: and a4, a6, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: sb a6, 4(a2) ; RV32I-NEXT: sb a4, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, s6, a7 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb s2, 8(a2) -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: sb t5, 11(a2) -; RV32I-NEXT: and a1, s9, a7 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a5, 7(a2) +; RV32I-NEXT: srli a3, s5, 16 +; RV32I-NEXT: and a4, s5, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s5, 24 +; RV32I-NEXT: sb s5, 8(a2) +; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a3, s6, 16 +; RV32I-NEXT: and a4, s6, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s6, 24 ; RV32I-NEXT: sb s6, 12(a2) -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t0, 14(a2) -; RV32I-NEXT: sb t6, 15(a2) -; RV32I-NEXT: and a3, ra, a7 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a3, s9, 16 +; RV32I-NEXT: and a4, s9, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s9, 24 ; RV32I-NEXT: sb s9, 16(a2) -; RV32I-NEXT: sb a1, 17(a2) -; RV32I-NEXT: sb t4, 18(a2) -; RV32I-NEXT: sb s4, 19(a2) -; RV32I-NEXT: and a1, a0, a7 -; RV32I-NEXT: and a4, s3, a7 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a3, ra, 16 +; RV32I-NEXT: and a4, ra, a1 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, ra, 24 ; RV32I-NEXT: sb ra, 20(a2) -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t3, 22(a2) -; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s0, 26(a2) -; RV32I-NEXT: sb s5, 27(a2) -; RV32I-NEXT: sb s3, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: sb a4, 25(a2) +; RV32I-NEXT: sb a3, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, s2, 16 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, s2, 24 +; RV32I-NEXT: sb s2, 28(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -7191,50 +7129,50 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB15_189: -; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: or t2, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_176 +; RV32I-NEXT: beqz t5, .LBB15_176 ; RV32I-NEXT: .LBB15_190: ; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: bne a5, s7, .LBB15_177 +; RV32I-NEXT: bne a4, s7, .LBB15_177 ; RV32I-NEXT: .LBB15_191: ; RV32I-NEXT: lw a6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_178 +; RV32I-NEXT: beqz t5, .LBB15_178 ; RV32I-NEXT: .LBB15_192: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s8, .LBB15_179 +; RV32I-NEXT: bne a4, s8, .LBB15_179 ; RV32I-NEXT: .LBB15_193: ; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_180 +; RV32I-NEXT: beqz t5, .LBB15_180 ; RV32I-NEXT: .LBB15_194: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s4, .LBB15_181 +; RV32I-NEXT: bne a4, s3, .LBB15_181 ; RV32I-NEXT: .LBB15_195: ; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_182 +; RV32I-NEXT: beqz t5, .LBB15_182 ; RV32I-NEXT: .LBB15_196: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bne a5, s0, .LBB15_183 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: bne a4, t6, .LBB15_183 ; RV32I-NEXT: .LBB15_197: ; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB15_184 +; RV32I-NEXT: beqz t5, .LBB15_184 ; RV32I-NEXT: .LBB15_198: ; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne a5, s1, .LBB15_185 +; RV32I-NEXT: bne a4, s0, .LBB15_185 ; RV32I-NEXT: .LBB15_199: -; RV32I-NEXT: or t3, s10, a3 +; RV32I-NEXT: or t2, s10, a3 ; RV32I-NEXT: li a3, 7 -; RV32I-NEXT: beq a5, a3, .LBB15_186 +; RV32I-NEXT: beq a4, a3, .LBB15_186 ; RV32I-NEXT: .LBB15_200: -; RV32I-NEXT: mv t5, t3 +; RV32I-NEXT: mv t4, t2 ; RV32I-NEXT: bnez a1, .LBB15_187 ; RV32I-NEXT: j .LBB15_188 %src = load i256, ptr %src.ptr, align 1 @@ -7262,278 +7200,280 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: sd s9, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s10, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s11, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu s0, 13(a0) -; RV64I-NEXT: lbu s1, 14(a0) -; RV64I-NEXT: lbu s2, 15(a0) +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: lbu a7, 10(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: lbu t1, 12(a0) +; RV64I-NEXT: lbu t2, 13(a0) +; RV64I-NEXT: lbu t3, 15(a0) +; RV64I-NEXT: lbu t4, 14(a0) +; RV64I-NEXT: or a4, a4, a3 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, a6, a5 +; RV64I-NEXT: or a3, a6, a5 ; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: slli a6, a5, 16 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a5, t3, t4 ; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: lbu a5, 22(a0) +; RV64I-NEXT: lbu t5, 23(a0) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s0, s0, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: or t6, t6, t5 -; RV64I-NEXT: or s0, s0, a4 -; RV64I-NEXT: or s1, s2, s1 -; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu t5, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: lbu t3, 4(a1) +; RV64I-NEXT: lbu t6, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: lbu s2, 27(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s0, 30(a0) +; RV64I-NEXT: lbu t1, 31(a0) ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or t0, t1, t0 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or t2, t4, a4 -; RV64I-NEXT: or a1, a1, t5 -; RV64I-NEXT: lbu t5, 19(a0) -; RV64I-NEXT: lbu t4, 21(a0) -; RV64I-NEXT: lbu a4, 22(a0) -; RV64I-NEXT: lbu t3, 23(a0) -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t6, t6, 16 -; RV64I-NEXT: slli s1, s1, 16 -; RV64I-NEXT: or s4, s3, a3 -; RV64I-NEXT: or a5, a7, a5 -; RV64I-NEXT: or a6, t6, a6 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: lbu t6, 29(a0) -; RV64I-NEXT: lbu a3, 30(a0) -; RV64I-NEXT: lbu s2, 31(a0) -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or s5, t1, t0 -; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, t6, t3 ; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: or a1, a1, t2 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli s3, t3, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: slli s0, s0, 32 +; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or t1, a5, s4 -; RV64I-NEXT: or a5, s0, a6 -; RV64I-NEXT: or a6, a1, s5 +; RV64I-NEXT: slli s3, t1, 8 +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: slli a6, a6, 5 -; RV64I-NEXT: sub t2, a6, t0 -; RV64I-NEXT: neg t3, a6 -; RV64I-NEXT: srl s0, t1, t3 -; RV64I-NEXT: bltu a6, t0, .LBB16_2 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t2, a6 +; RV64I-NEXT: sub t3, a6, t1 +; RV64I-NEXT: srl t6, a4, t2 +; RV64I-NEXT: bltu a6, t1, .LBB16_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: sll s4, t1, t2 +; RV64I-NEXT: sll s4, a4, t3 ; RV64I-NEXT: j .LBB16_3 ; RV64I-NEXT: .LBB16_2: -; RV64I-NEXT: sll a1, t1, a6 -; RV64I-NEXT: sll s4, a5, a6 -; RV64I-NEXT: or s4, s0, s4 +; RV64I-NEXT: sll a7, a3, a6 +; RV64I-NEXT: sll a1, a4, a6 +; RV64I-NEXT: or s4, t6, a7 ; RV64I-NEXT: .LBB16_3: -; RV64I-NEXT: slli t5, t5, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, s3, a4 -; RV64I-NEXT: lbu ra, 17(a0) -; RV64I-NEXT: lbu s11, 18(a0) -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s5, 25(a0) -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: lbu s7, 26(a0) -; RV64I-NEXT: lbu s6, 28(a0) -; RV64I-NEXT: slli s10, t6, 8 -; RV64I-NEXT: or s9, s2, a3 -; RV64I-NEXT: sub a4, a7, a6 -; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: slli s8, t4, 8 +; RV64I-NEXT: lbu s10, 17(a0) +; RV64I-NEXT: lbu s9, 18(a0) +; RV64I-NEXT: lbu t4, 20(a0) +; RV64I-NEXT: lbu s6, 25(a0) +; RV64I-NEXT: lbu s5, 26(a0) +; RV64I-NEXT: lbu s7, 28(a0) +; RV64I-NEXT: slli a7, t0, 8 +; RV64I-NEXT: or ra, t5, a5 +; RV64I-NEXT: slli s11, s2, 8 +; RV64I-NEXT: slli s2, s1, 8 +; RV64I-NEXT: or s3, s3, s0 +; RV64I-NEXT: li a5, 128 +; RV64I-NEXT: sub t0, a5, a6 +; RV64I-NEXT: mv a5, a3 ; RV64I-NEXT: beqz a6, .LBB16_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: mv a3, s4 +; RV64I-NEXT: mv a5, s4 ; RV64I-NEXT: .LBB16_5: -; RV64I-NEXT: slli t6, ra, 8 -; RV64I-NEXT: or t5, t5, s11 -; RV64I-NEXT: or t4, t4, s8 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: lbu s8, 16(a0) +; RV64I-NEXT: slli s0, s10, 8 +; RV64I-NEXT: or t5, s8, s9 +; RV64I-NEXT: lbu s1, 16(a0) ; RV64I-NEXT: lbu a0, 24(a0) -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: or s2, s1, s7 -; RV64I-NEXT: or s1, s10, s6 -; RV64I-NEXT: slli s4, s9, 16 -; RV64I-NEXT: bltu a4, t0, .LBB16_7 +; RV64I-NEXT: or t4, a7, t4 +; RV64I-NEXT: slli s8, ra, 16 +; RV64I-NEXT: slli s6, s6, 8 +; RV64I-NEXT: or s4, s11, s5 +; RV64I-NEXT: or s2, s2, s7 +; RV64I-NEXT: slli s3, s3, 16 +; RV64I-NEXT: bltu t0, t1, .LBB16_7 ; RV64I-NEXT: # %bb.6: -; RV64I-NEXT: sub s0, a4, t0 -; RV64I-NEXT: srl s0, a5, s0 +; RV64I-NEXT: sub a7, t0, t1 +; RV64I-NEXT: srl a7, a3, a7 ; RV64I-NEXT: j .LBB16_8 ; RV64I-NEXT: .LBB16_7: -; RV64I-NEXT: neg s6, a4 -; RV64I-NEXT: sll s6, a5, s6 -; RV64I-NEXT: or s0, s0, s6 +; RV64I-NEXT: neg a7, t0 +; RV64I-NEXT: sll a7, a3, a7 +; RV64I-NEXT: or a7, t6, a7 ; RV64I-NEXT: .LBB16_8: -; RV64I-NEXT: or t6, t6, s8 -; RV64I-NEXT: slli s6, t5, 16 -; RV64I-NEXT: or s3, s3, t4 -; RV64I-NEXT: or t5, s5, a0 -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: mv t4, t1 -; RV64I-NEXT: beqz a4, .LBB16_10 +; RV64I-NEXT: or s0, s0, s1 +; RV64I-NEXT: slli s1, t5, 16 +; RV64I-NEXT: or t6, s8, t4 +; RV64I-NEXT: or t5, s6, a0 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: mv t4, a4 +; RV64I-NEXT: beqz t0, .LBB16_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t4, s0 +; RV64I-NEXT: mv t4, a7 ; RV64I-NEXT: .LBB16_10: -; RV64I-NEXT: or a0, s6, t6 -; RV64I-NEXT: slli s0, s3, 32 -; RV64I-NEXT: or t6, s2, t5 -; RV64I-NEXT: slli s1, s1, 32 -; RV64I-NEXT: bltu a4, t0, .LBB16_12 +; RV64I-NEXT: or a0, s1, s0 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, s4, t5 +; RV64I-NEXT: slli s0, s2, 32 +; RV64I-NEXT: bltu t0, t1, .LBB16_12 ; RV64I-NEXT: # %bb.11: ; RV64I-NEXT: li t5, 0 ; RV64I-NEXT: j .LBB16_13 ; RV64I-NEXT: .LBB16_12: -; RV64I-NEXT: srl t5, a5, t3 +; RV64I-NEXT: srl t5, a3, t2 ; RV64I-NEXT: .LBB16_13: -; RV64I-NEXT: or a4, s0, a0 -; RV64I-NEXT: or a0, s1, t6 -; RV64I-NEXT: bltu a6, t0, .LBB16_15 +; RV64I-NEXT: or t0, t6, a0 +; RV64I-NEXT: or a0, s0, a7 +; RV64I-NEXT: bltu a6, t1, .LBB16_15 ; RV64I-NEXT: # %bb.14: -; RV64I-NEXT: li t6, 0 -; RV64I-NEXT: sll t2, a4, t2 +; RV64I-NEXT: li t2, 0 +; RV64I-NEXT: sll a7, t0, t3 ; RV64I-NEXT: j .LBB16_16 ; RV64I-NEXT: .LBB16_15: -; RV64I-NEXT: sll t6, a4, a6 -; RV64I-NEXT: srl t2, a4, t3 +; RV64I-NEXT: srl a7, t0, t2 ; RV64I-NEXT: sll t3, a0, a6 -; RV64I-NEXT: or t2, t2, t3 +; RV64I-NEXT: sll t2, t0, a6 +; RV64I-NEXT: or a7, a7, t3 ; RV64I-NEXT: .LBB16_16: -; RV64I-NEXT: sub s0, a6, a7 -; RV64I-NEXT: mv t3, a0 +; RV64I-NEXT: li t3, 128 +; RV64I-NEXT: sub s0, a6, t3 +; RV64I-NEXT: mv t6, a0 ; RV64I-NEXT: beqz a6, .LBB16_18 ; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: mv t3, t2 +; RV64I-NEXT: mv t6, a7 ; RV64I-NEXT: .LBB16_18: -; RV64I-NEXT: bltu s0, t0, .LBB16_20 +; RV64I-NEXT: bltu s0, t1, .LBB16_20 ; RV64I-NEXT: # %bb.19: -; RV64I-NEXT: li t2, 0 -; RV64I-NEXT: sub t0, s0, t0 -; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub a7, s0, t1 +; RV64I-NEXT: sll a4, a4, a7 ; RV64I-NEXT: bnez s0, .LBB16_21 ; RV64I-NEXT: j .LBB16_22 ; RV64I-NEXT: .LBB16_20: -; RV64I-NEXT: sll t2, t1, s0 -; RV64I-NEXT: neg t0, s0 -; RV64I-NEXT: srl t0, t1, t0 -; RV64I-NEXT: sll t1, a5, s0 -; RV64I-NEXT: or t0, t0, t1 +; RV64I-NEXT: neg a7, s0 +; RV64I-NEXT: srl a7, a4, a7 +; RV64I-NEXT: sll t1, a3, s0 +; RV64I-NEXT: sll t3, a4, s0 +; RV64I-NEXT: or a4, a7, t1 ; RV64I-NEXT: beqz s0, .LBB16_22 ; RV64I-NEXT: .LBB16_21: -; RV64I-NEXT: mv a5, t0 +; RV64I-NEXT: mv a3, a4 ; RV64I-NEXT: .LBB16_22: -; RV64I-NEXT: bltu a6, a7, .LBB16_24 +; RV64I-NEXT: li a4, 128 +; RV64I-NEXT: bltu a6, a4, .LBB16_24 ; RV64I-NEXT: # %bb.23: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a5, 0 ; RV64I-NEXT: bnez a6, .LBB16_25 ; RV64I-NEXT: j .LBB16_26 ; RV64I-NEXT: .LBB16_24: -; RV64I-NEXT: or t2, t4, t6 -; RV64I-NEXT: or a5, t5, t3 +; RV64I-NEXT: or t3, t4, t2 +; RV64I-NEXT: or a3, t5, t6 ; RV64I-NEXT: beqz a6, .LBB16_26 ; RV64I-NEXT: .LBB16_25: -; RV64I-NEXT: mv a4, t2 -; RV64I-NEXT: mv a0, a5 +; RV64I-NEXT: mv t0, t3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB16_26: -; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: srliw a6, a1, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a1, 24 -; RV64I-NEXT: srli t0, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a4, 32 -; RV64I-NEXT: srliw s2, a4, 16 -; RV64I-NEXT: srliw s6, a4, 24 -; RV64I-NEXT: srli s4, a4, 48 -; RV64I-NEXT: srli s7, a4, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a1, t2 -; RV64I-NEXT: srli s11, s11, 8 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw a7, a1, 24 +; RV64I-NEXT: and t1, a1, a3 +; RV64I-NEXT: srli t1, t1, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) +; RV64I-NEXT: sb t1, 1(a2) ; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a1, a5, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a1, a3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a1, a7, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a1, a4, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a7, 3(a2) +; RV64I-NEXT: and a6, a4, a3 +; RV64I-NEXT: srli a7, a1, 48 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a6, 5(a2) +; RV64I-NEXT: sb a7, 6(a2) +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a5, 32 +; RV64I-NEXT: srliw a4, a5, 16 +; RV64I-NEXT: and a6, a5, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srliw a7, a5, 24 +; RV64I-NEXT: sb a5, 8(a2) +; RV64I-NEXT: sb a6, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a7, 11(a2) +; RV64I-NEXT: srli a4, a5, 48 +; RV64I-NEXT: and a6, a1, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: sb a6, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: sb a5, 15(a2) +; RV64I-NEXT: srli a1, t0, 32 +; RV64I-NEXT: and a4, t0, a3 +; RV64I-NEXT: srliw a5, t0, 16 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srliw a6, t0, 24 +; RV64I-NEXT: sb t0, 16(a2) +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: and a4, a1, a3 +; RV64I-NEXT: srli a5, t0, 48 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: sb a1, 20(a2) +; RV64I-NEXT: sb a4, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) ; RV64I-NEXT: ld ra, 104(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 96(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 88(sp) # 8-byte Folded Reload @@ -7568,125 +7508,125 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a6 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a5, a7, t1 +; RV32I-NEXT: or a1, t0, a1 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a6, a5, a3 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a6, a4, a3 ; RV32I-NEXT: slli a1, a1, 5 -; RV32I-NEXT: srli a5, a1, 5 -; RV32I-NEXT: sll t5, a6, a1 +; RV32I-NEXT: srli a4, a1, 5 +; RV32I-NEXT: sll t4, a6, a1 ; RV32I-NEXT: li s9, 1 -; RV32I-NEXT: mv a4, t5 -; RV32I-NEXT: beqz a5, .LBB16_2 +; RV32I-NEXT: mv a5, t4 +; RV32I-NEXT: beqz a4, .LBB16_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: .LBB16_2: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li s0, 2 -; RV32I-NEXT: beq a5, s9, .LBB16_4 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: beq a4, s9, .LBB16_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB16_4: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s0, .LBB16_6 +; RV32I-NEXT: beq a4, t6, .LBB16_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: .LBB16_6: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: li s6, 3 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: beq a5, s6, .LBB16_8 +; RV32I-NEXT: li s3, 4 +; RV32I-NEXT: beq a4, s6, .LBB16_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB16_8: ; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: li s11, 5 -; RV32I-NEXT: beq a5, s4, .LBB16_10 +; RV32I-NEXT: beq a4, s3, .LBB16_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: mv t0, a5 ; RV32I-NEXT: .LBB16_10: ; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s11, .LBB16_12 +; RV32I-NEXT: beq a4, s11, .LBB16_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB16_12: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: lbu t0, 5(a0) ; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li s0, 6 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_14 +; RV32I-NEXT: beq a4, s0, .LBB16_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB16_14: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu a3, 4(a0) -; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t3, 4(a0) ; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: or a3, t2, t1 ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_16 +; RV32I-NEXT: beq a4, ra, .LBB16_16 ; RV32I-NEXT: # %bb.15: -; RV32I-NEXT: mv a7, a4 +; RV32I-NEXT: mv a7, a5 ; RV32I-NEXT: .LBB16_16: -; RV32I-NEXT: or a3, t0, a3 -; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: andi t6, a1, 31 -; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: or t1, t0, t3 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: andi t5, a1, 31 +; RV32I-NEXT: mv a5, a6 ; RV32I-NEXT: beqz a1, .LBB16_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB16_18: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t1, a3 -; RV32I-NEXT: neg s3, t6 -; RV32I-NEXT: srl s5, a6, s3 -; RV32I-NEXT: beqz t6, .LBB16_20 +; RV32I-NEXT: neg s2, t5 +; RV32I-NEXT: or a7, a3, t1 +; RV32I-NEXT: srl s4, a6, s2 +; RV32I-NEXT: beqz t5, .LBB16_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv t0, s5 +; RV32I-NEXT: mv t0, s4 ; RV32I-NEXT: .LBB16_20: ; RV32I-NEXT: sll s7, a7, a1 -; RV32I-NEXT: beqz a5, .LBB16_22 +; RV32I-NEXT: beqz a4, .LBB16_22 ; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: bne a5, s9, .LBB16_23 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: bne a4, s9, .LBB16_23 ; RV32I-NEXT: j .LBB16_24 ; RV32I-NEXT: .LBB16_22: ; RV32I-NEXT: or a3, s7, t0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: beq a5, s9, .LBB16_24 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beq a4, s9, .LBB16_24 ; RV32I-NEXT: .LBB16_23: ; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB16_24: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s0, .LBB16_40 +; RV32I-NEXT: bne a4, t6, .LBB16_40 ; RV32I-NEXT: # %bb.25: ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bne a5, s6, .LBB16_41 +; RV32I-NEXT: bne a4, s6, .LBB16_41 ; RV32I-NEXT: .LBB16_26: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB16_28 +; RV32I-NEXT: beq a4, s3, .LBB16_28 ; RV32I-NEXT: .LBB16_27: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: .LBB16_28: ; RV32I-NEXT: lbu t2, 11(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s11, .LBB16_30 +; RV32I-NEXT: beq a4, s11, .LBB16_30 ; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB16_30: @@ -7694,7 +7634,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lbu t1, 9(a0) ; RV32I-NEXT: lbu a3, 10(a0) ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_32 +; RV32I-NEXT: beq a4, s0, .LBB16_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB16_32: @@ -7702,623 +7642,613 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lbu t3, 8(a0) ; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_34 +; RV32I-NEXT: beq a4, ra, .LBB16_34 ; RV32I-NEXT: # %bb.33: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB16_34: ; RV32I-NEXT: or a3, t1, t3 -; RV32I-NEXT: slli a6, t2, 16 -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv s1, a7 ; RV32I-NEXT: beqz a1, .LBB16_36 ; RV32I-NEXT: # %bb.35: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: mv s1, t0 ; RV32I-NEXT: .LBB16_36: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, a6, a3 -; RV32I-NEXT: srl s8, a7, s3 -; RV32I-NEXT: beqz t6, .LBB16_38 +; RV32I-NEXT: or a6, t2, a3 +; RV32I-NEXT: srl s8, a7, s2 +; RV32I-NEXT: beqz t5, .LBB16_38 ; RV32I-NEXT: # %bb.37: ; RV32I-NEXT: mv t0, s8 ; RV32I-NEXT: .LBB16_38: ; RV32I-NEXT: sll s10, a6, a1 -; RV32I-NEXT: beqz a5, .LBB16_42 +; RV32I-NEXT: beqz a4, .LBB16_42 ; RV32I-NEXT: # %bb.39: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_43 +; RV32I-NEXT: bnez t5, .LBB16_43 ; RV32I-NEXT: j .LBB16_44 ; RV32I-NEXT: .LBB16_40: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: beq a5, s6, .LBB16_26 +; RV32I-NEXT: beq a4, s6, .LBB16_26 ; RV32I-NEXT: .LBB16_41: ; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB16_27 +; RV32I-NEXT: bne a4, s3, .LBB16_27 ; RV32I-NEXT: j .LBB16_28 ; RV32I-NEXT: .LBB16_42: ; RV32I-NEXT: or a7, s10, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_44 +; RV32I-NEXT: beqz t5, .LBB16_44 ; RV32I-NEXT: .LBB16_43: -; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv a3, s4 ; RV32I-NEXT: .LBB16_44: -; RV32I-NEXT: beq a5, s9, .LBB16_61 +; RV32I-NEXT: beq a4, s9, .LBB16_61 ; RV32I-NEXT: # %bb.45: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne a5, s0, .LBB16_62 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: bne a4, t6, .LBB16_62 ; RV32I-NEXT: .LBB16_46: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bne a5, s6, .LBB16_63 +; RV32I-NEXT: bne a4, s6, .LBB16_63 ; RV32I-NEXT: .LBB16_47: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB16_49 +; RV32I-NEXT: beq a4, s3, .LBB16_49 ; RV32I-NEXT: .LBB16_48: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB16_49: -; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: lbu t2, 15(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s11, .LBB16_51 +; RV32I-NEXT: beq a4, s11, .LBB16_51 ; RV32I-NEXT: # %bb.50: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB16_51: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 13(a0) ; RV32I-NEXT: lbu a3, 14(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_53 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB16_53 ; RV32I-NEXT: # %bb.52: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB16_53: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 12(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_55 +; RV32I-NEXT: beq a4, ra, .LBB16_55 ; RV32I-NEXT: # %bb.54: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB16_55: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: mv s2, a6 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv s5, a6 ; RV32I-NEXT: beqz a1, .LBB16_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv s2, t0 +; RV32I-NEXT: mv s5, t0 ; RV32I-NEXT: .LBB16_57: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t3, a3 -; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: or a7, t2, a3 +; RV32I-NEXT: srl a3, a6, s2 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB16_59 +; RV32I-NEXT: beqz t5, .LBB16_59 ; RV32I-NEXT: # %bb.58: ; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_59: ; RV32I-NEXT: sll a3, a7, a1 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB16_64 +; RV32I-NEXT: beqz a4, .LBB16_64 ; RV32I-NEXT: # %bb.60: ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_65 +; RV32I-NEXT: bnez t5, .LBB16_65 ; RV32I-NEXT: j .LBB16_66 ; RV32I-NEXT: .LBB16_61: ; RV32I-NEXT: or a7, s7, a3 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s0, .LBB16_46 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, t6, .LBB16_46 ; RV32I-NEXT: .LBB16_62: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s6, .LBB16_47 +; RV32I-NEXT: beq a4, s6, .LBB16_47 ; RV32I-NEXT: .LBB16_63: ; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB16_48 +; RV32I-NEXT: bne a4, s3, .LBB16_48 ; RV32I-NEXT: j .LBB16_49 ; RV32I-NEXT: .LBB16_64: ; RV32I-NEXT: or a6, a3, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_66 +; RV32I-NEXT: beqz t5, .LBB16_66 ; RV32I-NEXT: .LBB16_65: ; RV32I-NEXT: mv a3, s8 ; RV32I-NEXT: .LBB16_66: -; RV32I-NEXT: beq a5, s9, .LBB16_84 +; RV32I-NEXT: beq a4, s9, .LBB16_84 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_85 +; RV32I-NEXT: bnez t5, .LBB16_85 ; RV32I-NEXT: .LBB16_68: -; RV32I-NEXT: beq a5, s0, .LBB16_86 +; RV32I-NEXT: beq a4, t6, .LBB16_86 ; RV32I-NEXT: .LBB16_69: -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: bne a5, s6, .LBB16_87 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: bne a4, s6, .LBB16_87 ; RV32I-NEXT: .LBB16_70: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s4, .LBB16_72 +; RV32I-NEXT: beq a4, s3, .LBB16_72 ; RV32I-NEXT: .LBB16_71: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB16_72: -; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: lbu t2, 19(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s11, .LBB16_74 +; RV32I-NEXT: beq a4, s11, .LBB16_74 ; RV32I-NEXT: # %bb.73: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB16_74: ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: lbu t1, 17(a0) ; RV32I-NEXT: lbu a3, 18(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_76 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB16_76 ; RV32I-NEXT: # %bb.75: ; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB16_76: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 16(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 16(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_78 +; RV32I-NEXT: beq a4, ra, .LBB16_78 ; RV32I-NEXT: # %bb.77: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB16_78: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: mv s6, a7 ; RV32I-NEXT: beqz a1, .LBB16_80 ; RV32I-NEXT: # %bb.79: ; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB16_80: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, t3, a3 -; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: or a6, t2, a3 +; RV32I-NEXT: srl a3, a7, s2 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB16_82 +; RV32I-NEXT: beqz t5, .LBB16_82 ; RV32I-NEXT: # %bb.81: ; RV32I-NEXT: lw t0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_82: ; RV32I-NEXT: sll a3, a6, a1 ; RV32I-NEXT: sw a3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB16_88 +; RV32I-NEXT: beqz a4, .LBB16_88 ; RV32I-NEXT: # %bb.83: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_89 +; RV32I-NEXT: bnez t5, .LBB16_89 ; RV32I-NEXT: j .LBB16_90 ; RV32I-NEXT: .LBB16_84: ; RV32I-NEXT: or a6, s10, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_68 +; RV32I-NEXT: beqz t5, .LBB16_68 ; RV32I-NEXT: .LBB16_85: -; RV32I-NEXT: mv a3, s5 -; RV32I-NEXT: bne a5, s0, .LBB16_69 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne a4, t6, .LBB16_69 ; RV32I-NEXT: .LBB16_86: ; RV32I-NEXT: or a6, s7, a3 -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: beq a5, s6, .LBB16_70 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: beq a4, s6, .LBB16_70 ; RV32I-NEXT: .LBB16_87: ; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s4, .LBB16_71 +; RV32I-NEXT: bne a4, s3, .LBB16_71 ; RV32I-NEXT: j .LBB16_72 ; RV32I-NEXT: .LBB16_88: ; RV32I-NEXT: or a7, a3, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_90 +; RV32I-NEXT: beqz t5, .LBB16_90 ; RV32I-NEXT: .LBB16_89: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_90: -; RV32I-NEXT: beq a5, s9, .LBB16_109 +; RV32I-NEXT: beq a4, s9, .LBB16_109 ; RV32I-NEXT: # %bb.91: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_110 +; RV32I-NEXT: bnez t5, .LBB16_110 ; RV32I-NEXT: .LBB16_92: -; RV32I-NEXT: beq a5, s0, .LBB16_111 +; RV32I-NEXT: beq a4, t6, .LBB16_111 ; RV32I-NEXT: .LBB16_93: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_112 +; RV32I-NEXT: bnez t5, .LBB16_112 ; RV32I-NEXT: .LBB16_94: ; RV32I-NEXT: li t0, 3 -; RV32I-NEXT: beq a5, t0, .LBB16_113 +; RV32I-NEXT: beq a4, t0, .LBB16_113 ; RV32I-NEXT: .LBB16_95: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s4, .LBB16_97 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, s3, .LBB16_97 ; RV32I-NEXT: .LBB16_96: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB16_97: -; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: lbu t2, 23(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s11, .LBB16_99 +; RV32I-NEXT: beq a4, s11, .LBB16_99 ; RV32I-NEXT: # %bb.98: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB16_99: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 21(a0) ; RV32I-NEXT: lbu a3, 22(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_101 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s0, .LBB16_101 ; RV32I-NEXT: # %bb.100: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB16_101: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 20(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 20(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_103 +; RV32I-NEXT: beq a4, ra, .LBB16_103 ; RV32I-NEXT: # %bb.102: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB16_103: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: mv s9, a6 ; RV32I-NEXT: beqz a1, .LBB16_105 ; RV32I-NEXT: # %bb.104: ; RV32I-NEXT: mv s9, t0 ; RV32I-NEXT: .LBB16_105: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: or t0, t3, a3 -; RV32I-NEXT: srl a6, a6, s3 -; RV32I-NEXT: beqz t6, .LBB16_107 +; RV32I-NEXT: or t0, t2, a3 +; RV32I-NEXT: srl a6, a6, s2 +; RV32I-NEXT: beqz t5, .LBB16_107 ; RV32I-NEXT: # %bb.106: ; RV32I-NEXT: mv t1, a6 ; RV32I-NEXT: .LBB16_107: ; RV32I-NEXT: sll a7, t0, a1 -; RV32I-NEXT: beqz a5, .LBB16_114 +; RV32I-NEXT: beqz a4, .LBB16_114 ; RV32I-NEXT: # %bb.108: ; RV32I-NEXT: li t1, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_115 +; RV32I-NEXT: bnez t5, .LBB16_115 ; RV32I-NEXT: j .LBB16_116 ; RV32I-NEXT: .LBB16_109: ; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: or a7, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_92 +; RV32I-NEXT: beqz t5, .LBB16_92 ; RV32I-NEXT: .LBB16_110: ; RV32I-NEXT: mv a3, s8 -; RV32I-NEXT: bne a5, s0, .LBB16_93 +; RV32I-NEXT: bne a4, t6, .LBB16_93 ; RV32I-NEXT: .LBB16_111: ; RV32I-NEXT: or a7, s10, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_94 +; RV32I-NEXT: beqz t5, .LBB16_94 ; RV32I-NEXT: .LBB16_112: -; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv a3, s4 ; RV32I-NEXT: li t0, 3 -; RV32I-NEXT: bne a5, t0, .LBB16_95 +; RV32I-NEXT: bne a4, t0, .LBB16_95 ; RV32I-NEXT: .LBB16_113: ; RV32I-NEXT: or a7, s7, a3 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne a5, s4, .LBB16_96 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: bne a4, s3, .LBB16_96 ; RV32I-NEXT: j .LBB16_97 ; RV32I-NEXT: .LBB16_114: ; RV32I-NEXT: or t1, a7, t1 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_116 +; RV32I-NEXT: beqz t5, .LBB16_116 ; RV32I-NEXT: .LBB16_115: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_116: -; RV32I-NEXT: li t3, 1 -; RV32I-NEXT: beq a5, t3, .LBB16_136 +; RV32I-NEXT: li t2, 1 +; RV32I-NEXT: beq a4, t2, .LBB16_136 ; RV32I-NEXT: # %bb.117: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_137 +; RV32I-NEXT: bnez t5, .LBB16_137 ; RV32I-NEXT: .LBB16_118: -; RV32I-NEXT: beq a5, s0, .LBB16_138 +; RV32I-NEXT: beq a4, t6, .LBB16_138 ; RV32I-NEXT: .LBB16_119: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_139 +; RV32I-NEXT: bnez t5, .LBB16_139 ; RV32I-NEXT: .LBB16_120: -; RV32I-NEXT: li t3, 3 -; RV32I-NEXT: beq a5, t3, .LBB16_140 +; RV32I-NEXT: li t2, 3 +; RV32I-NEXT: beq a4, t2, .LBB16_140 ; RV32I-NEXT: .LBB16_121: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_141 +; RV32I-NEXT: bnez t5, .LBB16_141 ; RV32I-NEXT: .LBB16_122: -; RV32I-NEXT: bne a5, s4, .LBB16_124 +; RV32I-NEXT: bne a4, s3, .LBB16_124 ; RV32I-NEXT: .LBB16_123: ; RV32I-NEXT: or t1, s7, a3 ; RV32I-NEXT: .LBB16_124: -; RV32I-NEXT: sw t2, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s0, 27(a0) -; RV32I-NEXT: mv t3, t5 -; RV32I-NEXT: beq a5, s11, .LBB16_126 +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: mv t2, t4 +; RV32I-NEXT: beq a4, s11, .LBB16_126 ; RV32I-NEXT: # %bb.125: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB16_126: -; RV32I-NEXT: mv t2, s10 +; RV32I-NEXT: sw s10, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu t3, 25(a0) ; RV32I-NEXT: lbu a3, 26(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: beq a5, s1, .LBB16_128 +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: beq a4, s0, .LBB16_128 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: mv t1, t2 ; RV32I-NEXT: .LBB16_128: -; RV32I-NEXT: mv s10, s8 -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: or s0, s0, a3 -; RV32I-NEXT: slli a3, t4, 8 -; RV32I-NEXT: beq a5, ra, .LBB16_130 +; RV32I-NEXT: mv s10, a5 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s0, 24(a0) +; RV32I-NEXT: or t6, t6, a3 +; RV32I-NEXT: slli a3, t3, 8 +; RV32I-NEXT: beq a4, ra, .LBB16_130 ; RV32I-NEXT: # %bb.129: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB16_130: -; RV32I-NEXT: li s8, 4 -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: mv a5, s8 +; RV32I-NEXT: or a3, a3, s0 +; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: mv ra, t0 ; RV32I-NEXT: beqz a1, .LBB16_132 ; RV32I-NEXT: # %bb.131: -; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: mv ra, t2 ; RV32I-NEXT: .LBB16_132: -; RV32I-NEXT: li s4, 5 -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or t3, s0, a3 -; RV32I-NEXT: srl t0, t0, s3 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: beqz t6, .LBB16_134 +; RV32I-NEXT: li s8, 4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t2, t6, a3 +; RV32I-NEXT: srl t0, t0, s2 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: beqz t5, .LBB16_134 ; RV32I-NEXT: # %bb.133: -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: mv t3, t0 ; RV32I-NEXT: .LBB16_134: -; RV32I-NEXT: mv s11, a4 -; RV32I-NEXT: sll t1, t3, a1 -; RV32I-NEXT: li s0, 2 -; RV32I-NEXT: mv a4, s7 -; RV32I-NEXT: beqz a5, .LBB16_142 +; RV32I-NEXT: li s3, 5 +; RV32I-NEXT: sll t1, t2, a1 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: mv s11, s7 +; RV32I-NEXT: beqz a4, .LBB16_142 ; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: j .LBB16_143 ; RV32I-NEXT: .LBB16_136: ; RV32I-NEXT: lw t1, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_118 +; RV32I-NEXT: beqz t5, .LBB16_118 ; RV32I-NEXT: .LBB16_137: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB16_119 +; RV32I-NEXT: bne a4, t6, .LBB16_119 ; RV32I-NEXT: .LBB16_138: ; RV32I-NEXT: lw t1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_120 +; RV32I-NEXT: beqz t5, .LBB16_120 ; RV32I-NEXT: .LBB16_139: ; RV32I-NEXT: mv a3, s8 -; RV32I-NEXT: li t3, 3 -; RV32I-NEXT: bne a5, t3, .LBB16_121 +; RV32I-NEXT: li t2, 3 +; RV32I-NEXT: bne a4, t2, .LBB16_121 ; RV32I-NEXT: .LBB16_140: ; RV32I-NEXT: or t1, s10, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_122 +; RV32I-NEXT: beqz t5, .LBB16_122 ; RV32I-NEXT: .LBB16_141: -; RV32I-NEXT: mv a3, s5 -; RV32I-NEXT: beq a5, s4, .LBB16_123 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: beq a4, s3, .LBB16_123 ; RV32I-NEXT: j .LBB16_124 ; RV32I-NEXT: .LBB16_142: -; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: or t3, t1, t3 ; RV32I-NEXT: .LBB16_143: -; RV32I-NEXT: mv s7, s5 +; RV32I-NEXT: mv s7, s4 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_145 +; RV32I-NEXT: beqz t5, .LBB16_145 ; RV32I-NEXT: # %bb.144: ; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: .LBB16_145: -; RV32I-NEXT: li s5, 1 -; RV32I-NEXT: bne a5, s5, .LBB16_147 +; RV32I-NEXT: li s4, 1 +; RV32I-NEXT: bne a4, s4, .LBB16_147 ; RV32I-NEXT: # %bb.146: -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: .LBB16_147: -; RV32I-NEXT: mv s5, s7 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_166 +; RV32I-NEXT: bnez t5, .LBB16_166 ; RV32I-NEXT: # %bb.148: -; RV32I-NEXT: beq a5, s0, .LBB16_167 +; RV32I-NEXT: beq a4, t6, .LBB16_167 ; RV32I-NEXT: .LBB16_149: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_168 +; RV32I-NEXT: bnez t5, .LBB16_168 ; RV32I-NEXT: .LBB16_150: -; RV32I-NEXT: li s0, 3 -; RV32I-NEXT: beq a5, s0, .LBB16_169 +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: beq a4, t6, .LBB16_169 ; RV32I-NEXT: .LBB16_151: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_170 +; RV32I-NEXT: bnez t5, .LBB16_170 ; RV32I-NEXT: .LBB16_152: -; RV32I-NEXT: beq a5, s8, .LBB16_171 +; RV32I-NEXT: beq a4, s8, .LBB16_171 ; RV32I-NEXT: .LBB16_153: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_172 +; RV32I-NEXT: bnez t5, .LBB16_172 ; RV32I-NEXT: .LBB16_154: ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: bne a5, s4, .LBB16_156 +; RV32I-NEXT: bne a4, s3, .LBB16_156 ; RV32I-NEXT: .LBB16_155: -; RV32I-NEXT: or t4, a4, a3 +; RV32I-NEXT: or t3, s11, a3 ; RV32I-NEXT: .LBB16_156: -; RV32I-NEXT: lbu s0, 31(a0) -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s1, .LBB16_158 -; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, s0, .LBB16_158 +; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: mv a3, t3 ; RV32I-NEXT: .LBB16_158: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: lbu s5, 29(a0) -; RV32I-NEXT: lbu s1, 30(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: li s4, 7 -; RV32I-NEXT: beq a5, s4, .LBB16_160 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s4, 29(a0) +; RV32I-NEXT: lbu s0, 30(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: beq a4, s3, .LBB16_160 ; RV32I-NEXT: # %bb.159: -; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: mv t3, a3 ; RV32I-NEXT: .LBB16_160: ; RV32I-NEXT: lbu a3, 28(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: or t6, t6, s0 +; RV32I-NEXT: mv a0, t2 ; RV32I-NEXT: beqz a1, .LBB16_162 ; RV32I-NEXT: # %bb.161: -; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: mv a0, t3 ; RV32I-NEXT: .LBB16_162: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: beqz t6, .LBB16_164 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or a3, s4, a3 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: li s3, 4 +; RV32I-NEXT: beqz t5, .LBB16_164 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: srl t3, t2, s2 ; RV32I-NEXT: .LBB16_164: -; RV32I-NEXT: or s3, s0, a3 -; RV32I-NEXT: li s0, 5 -; RV32I-NEXT: beqz a5, .LBB16_173 +; RV32I-NEXT: or s2, t6, a3 +; RV32I-NEXT: li t6, 5 +; RV32I-NEXT: beqz a4, .LBB16_173 ; RV32I-NEXT: # %bb.165: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t2, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_174 +; RV32I-NEXT: bnez t5, .LBB16_174 ; RV32I-NEXT: j .LBB16_175 ; RV32I-NEXT: .LBB16_166: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB16_149 +; RV32I-NEXT: bne a4, t6, .LBB16_149 ; RV32I-NEXT: .LBB16_167: -; RV32I-NEXT: lw t4, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, t3, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_150 +; RV32I-NEXT: beqz t5, .LBB16_150 ; RV32I-NEXT: .LBB16_168: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: li s0, 3 -; RV32I-NEXT: bne a5, s0, .LBB16_151 +; RV32I-NEXT: li t6, 3 +; RV32I-NEXT: bne a4, t6, .LBB16_151 ; RV32I-NEXT: .LBB16_169: -; RV32I-NEXT: lw t4, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: lw t3, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, t3, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_152 +; RV32I-NEXT: beqz t5, .LBB16_152 ; RV32I-NEXT: .LBB16_170: -; RV32I-NEXT: mv a3, s10 -; RV32I-NEXT: bne a5, s8, .LBB16_153 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: bne a4, s8, .LBB16_153 ; RV32I-NEXT: .LBB16_171: -; RV32I-NEXT: or t4, t2, a3 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, t3, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_154 +; RV32I-NEXT: beqz t5, .LBB16_154 ; RV32I-NEXT: .LBB16_172: -; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv a3, s4 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: beq a5, s4, .LBB16_155 +; RV32I-NEXT: beq a4, s3, .LBB16_155 ; RV32I-NEXT: j .LBB16_156 ; RV32I-NEXT: .LBB16_173: -; RV32I-NEXT: sll a3, s3, a1 -; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: sll a3, s2, a1 +; RV32I-NEXT: or t2, a3, t3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_175 +; RV32I-NEXT: beqz t5, .LBB16_175 ; RV32I-NEXT: .LBB16_174: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB16_175: ; RV32I-NEXT: li t0, 1 -; RV32I-NEXT: beq a5, t0, .LBB16_195 +; RV32I-NEXT: beq a4, t0, .LBB16_191 ; RV32I-NEXT: # %bb.176: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_196 +; RV32I-NEXT: bnez t5, .LBB16_192 ; RV32I-NEXT: .LBB16_177: -; RV32I-NEXT: bne a5, s8, .LBB16_179 +; RV32I-NEXT: bne a4, s8, .LBB16_179 ; RV32I-NEXT: .LBB16_178: -; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: or t2, a7, a3 ; RV32I-NEXT: .LBB16_179: ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a6, 3 -; RV32I-NEXT: bnez t6, .LBB16_197 +; RV32I-NEXT: bnez t5, .LBB16_193 ; RV32I-NEXT: # %bb.180: -; RV32I-NEXT: beq a5, a6, .LBB16_198 +; RV32I-NEXT: beq a4, a6, .LBB16_194 ; RV32I-NEXT: .LBB16_181: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_199 +; RV32I-NEXT: bnez t5, .LBB16_195 ; RV32I-NEXT: .LBB16_182: -; RV32I-NEXT: beq a5, s4, .LBB16_200 +; RV32I-NEXT: beq a4, s3, .LBB16_196 ; RV32I-NEXT: .LBB16_183: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB16_201 +; RV32I-NEXT: bnez t5, .LBB16_197 ; RV32I-NEXT: .LBB16_184: -; RV32I-NEXT: bne a5, s0, .LBB16_186 +; RV32I-NEXT: beq a4, t6, .LBB16_198 ; RV32I-NEXT: .LBB16_185: -; RV32I-NEXT: or t3, t2, a3 -; RV32I-NEXT: .LBB16_186: -; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_188 -; RV32I-NEXT: # %bb.187: -; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bnez t5, .LBB16_199 +; RV32I-NEXT: .LBB16_186: +; RV32I-NEXT: beq a4, s0, .LBB16_200 +; RV32I-NEXT: .LBB16_187: +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: bne a4, a3, .LBB16_201 ; RV32I-NEXT: .LBB16_188: -; RV32I-NEXT: bne a5, s1, .LBB16_190 -; RV32I-NEXT: # %bb.189: -; RV32I-NEXT: or t3, a4, a3 +; RV32I-NEXT: beqz a1, .LBB16_190 +; RV32I-NEXT: .LBB16_189: +; RV32I-NEXT: mv s2, t4 ; RV32I-NEXT: .LBB16_190: -; RV32I-NEXT: mv a4, s11 -; RV32I-NEXT: li a3, 7 -; RV32I-NEXT: beq a5, a3, .LBB16_192 -; RV32I-NEXT: # %bb.191: -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: .LBB16_192: -; RV32I-NEXT: beqz a1, .LBB16_194 -; RV32I-NEXT: # %bb.193: -; RV32I-NEXT: mv s3, t5 -; RV32I-NEXT: .LBB16_194: -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: lui a7, 16 -; RV32I-NEXT: srli a6, a4, 24 -; RV32I-NEXT: srli a3, t2, 16 -; RV32I-NEXT: srli t1, t2, 24 -; RV32I-NEXT: srli a5, s2, 16 -; RV32I-NEXT: srli t5, s2, 24 -; RV32I-NEXT: srli t0, s6, 16 -; RV32I-NEXT: srli t6, s6, 24 -; RV32I-NEXT: srli t4, s9, 16 -; RV32I-NEXT: srli s4, s9, 24 -; RV32I-NEXT: srli t3, ra, 16 -; RV32I-NEXT: srli s1, ra, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s7, s3, 16 -; RV32I-NEXT: srli s8, s3, 24 -; RV32I-NEXT: addi a7, a7, -1 -; RV32I-NEXT: and s10, a4, a7 -; RV32I-NEXT: and s11, t2, a7 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: srli a3, s10, 16 +; RV32I-NEXT: and a4, s10, a1 +; RV32I-NEXT: srli a6, s10, 24 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb s10, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) ; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: and a1, s2, a7 -; RV32I-NEXT: srli a4, s11, 8 -; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: srli a3, s1, 16 +; RV32I-NEXT: and a4, s1, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s1, 24 +; RV32I-NEXT: sb s1, 4(a2) ; RV32I-NEXT: sb a4, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, s6, a7 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb s2, 8(a2) -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: sb t5, 11(a2) -; RV32I-NEXT: and a1, s9, a7 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a5, 7(a2) +; RV32I-NEXT: srli a3, s5, 16 +; RV32I-NEXT: and a4, s5, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s5, 24 +; RV32I-NEXT: sb s5, 8(a2) +; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a3, s6, 16 +; RV32I-NEXT: and a4, s6, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s6, 24 ; RV32I-NEXT: sb s6, 12(a2) -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t0, 14(a2) -; RV32I-NEXT: sb t6, 15(a2) -; RV32I-NEXT: and a3, ra, a7 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a3, s9, 16 +; RV32I-NEXT: and a4, s9, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s9, 24 ; RV32I-NEXT: sb s9, 16(a2) -; RV32I-NEXT: sb a1, 17(a2) -; RV32I-NEXT: sb t4, 18(a2) -; RV32I-NEXT: sb s4, 19(a2) -; RV32I-NEXT: and a1, a0, a7 -; RV32I-NEXT: and a4, s3, a7 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a3, ra, 16 +; RV32I-NEXT: and a4, ra, a1 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, ra, 24 ; RV32I-NEXT: sb ra, 20(a2) -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t3, 22(a2) -; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s0, 26(a2) -; RV32I-NEXT: sb s5, 27(a2) -; RV32I-NEXT: sb s3, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: sb a4, 25(a2) +; RV32I-NEXT: sb a3, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, s2, 16 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, s2, 24 +; RV32I-NEXT: sb s2, 28(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -8334,34 +8264,49 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB16_195: -; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: .LBB16_191: +; RV32I-NEXT: or t2, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_177 -; RV32I-NEXT: .LBB16_196: +; RV32I-NEXT: beqz t5, .LBB16_177 +; RV32I-NEXT: .LBB16_192: ; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: beq a5, s8, .LBB16_178 +; RV32I-NEXT: beq a4, s8, .LBB16_178 ; RV32I-NEXT: j .LBB16_179 -; RV32I-NEXT: .LBB16_197: +; RV32I-NEXT: .LBB16_193: ; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, a6, .LBB16_181 -; RV32I-NEXT: .LBB16_198: +; RV32I-NEXT: bne a4, a6, .LBB16_181 +; RV32I-NEXT: .LBB16_194: ; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_182 -; RV32I-NEXT: .LBB16_199: +; RV32I-NEXT: beqz t5, .LBB16_182 +; RV32I-NEXT: .LBB16_195: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s4, .LBB16_183 -; RV32I-NEXT: .LBB16_200: +; RV32I-NEXT: bne a4, s3, .LBB16_183 +; RV32I-NEXT: .LBB16_196: ; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: or t2, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t5, .LBB16_184 +; RV32I-NEXT: .LBB16_197: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: bne a4, t6, .LBB16_185 +; RV32I-NEXT: .LBB16_198: +; RV32I-NEXT: lw a5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t2, a5, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB16_184 +; RV32I-NEXT: beqz t5, .LBB16_186 +; RV32I-NEXT: .LBB16_199: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne a4, s0, .LBB16_187 +; RV32I-NEXT: .LBB16_200: +; RV32I-NEXT: or t2, s11, a3 +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: beq a4, a3, .LBB16_188 ; RV32I-NEXT: .LBB16_201: -; RV32I-NEXT: mv a3, s10 -; RV32I-NEXT: beq a5, s0, .LBB16_185 -; RV32I-NEXT: j .LBB16_186 +; RV32I-NEXT: mv t4, t2 +; RV32I-NEXT: bnez a1, .LBB16_189 +; RV32I-NEXT: j .LBB16_190 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 %bitOff = shl i256 %wordOff, 5 @@ -8387,278 +8332,280 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV64I-NEXT: sd s9, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s10, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s11, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) -; RV64I-NEXT: lbu t0, 5(a0) -; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: lbu a5, 7(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 0(a0) +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a4, a6 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a5, a5, t2 +; RV64I-NEXT: or a6, t1, t0 +; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu s0, 13(a0) -; RV64I-NEXT: lbu s1, 14(a0) -; RV64I-NEXT: lbu s2, 15(a0) +; RV64I-NEXT: or a4, a5, a6 +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 9(a0) +; RV64I-NEXT: lbu a7, 10(a0) +; RV64I-NEXT: lbu t0, 11(a0) +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: lbu t1, 12(a0) +; RV64I-NEXT: lbu t2, 13(a0) +; RV64I-NEXT: lbu t3, 15(a0) +; RV64I-NEXT: lbu t4, 14(a0) +; RV64I-NEXT: or a4, a4, a3 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, a6, a5 +; RV64I-NEXT: or a3, a6, a5 ; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: slli a6, a5, 16 +; RV64I-NEXT: slli t3, t3, 8 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a5, t3, t4 ; RV64I-NEXT: or a7, t2, t1 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu t0, 21(a0) +; RV64I-NEXT: lbu a5, 22(a0) +; RV64I-NEXT: lbu t5, 23(a0) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s0, s0, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: or t6, t6, t5 -; RV64I-NEXT: or s0, s0, a4 -; RV64I-NEXT: or s1, s2, s1 -; RV64I-NEXT: lbu a4, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu t5, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a3, a6, a3 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: or a3, a7, a3 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli t5, t5, 8 ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: lbu t3, 4(a1) +; RV64I-NEXT: lbu t6, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: or a6, t1, a6 +; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: lbu s2, 27(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s0, 30(a0) +; RV64I-NEXT: lbu t1, 31(a0) ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or t0, t1, t0 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or t2, t4, a4 -; RV64I-NEXT: or a1, a1, t5 -; RV64I-NEXT: lbu t5, 19(a0) -; RV64I-NEXT: lbu t4, 21(a0) -; RV64I-NEXT: lbu a4, 22(a0) -; RV64I-NEXT: lbu t3, 23(a0) -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t6, t6, 16 -; RV64I-NEXT: slli s1, s1, 16 -; RV64I-NEXT: or s4, s3, a3 -; RV64I-NEXT: or a5, a7, a5 -; RV64I-NEXT: or a6, t6, a6 -; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: lbu t6, 29(a0) -; RV64I-NEXT: lbu a3, 30(a0) -; RV64I-NEXT: lbu s2, 31(a0) -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or s5, t1, t0 -; RV64I-NEXT: li a7, 128 +; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, t6, t3 ; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: or a1, a1, t2 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli s3, t3, 8 -; RV64I-NEXT: slli s2, s2, 8 -; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: slli s0, s0, 32 +; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: or t1, a5, s4 -; RV64I-NEXT: or a5, s0, a6 -; RV64I-NEXT: or a6, a1, s5 +; RV64I-NEXT: slli s3, t1, 8 +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: slli a6, a6, 6 -; RV64I-NEXT: sub t2, a6, t0 -; RV64I-NEXT: neg t3, a6 -; RV64I-NEXT: srl s0, t1, t3 -; RV64I-NEXT: bltu a6, t0, .LBB17_2 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t2, a6 +; RV64I-NEXT: sub t3, a6, t1 +; RV64I-NEXT: srl t6, a4, t2 +; RV64I-NEXT: bltu a6, t1, .LBB17_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: sll s4, t1, t2 +; RV64I-NEXT: sll s4, a4, t3 ; RV64I-NEXT: j .LBB17_3 ; RV64I-NEXT: .LBB17_2: -; RV64I-NEXT: sll a1, t1, a6 -; RV64I-NEXT: sll s4, a5, a6 -; RV64I-NEXT: or s4, s0, s4 +; RV64I-NEXT: sll a7, a3, a6 +; RV64I-NEXT: sll a1, a4, a6 +; RV64I-NEXT: or s4, t6, a7 ; RV64I-NEXT: .LBB17_3: -; RV64I-NEXT: slli t5, t5, 8 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or s3, s3, a4 -; RV64I-NEXT: lbu ra, 17(a0) -; RV64I-NEXT: lbu s11, 18(a0) -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s5, 25(a0) -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: lbu s7, 26(a0) -; RV64I-NEXT: lbu s6, 28(a0) -; RV64I-NEXT: slli s10, t6, 8 -; RV64I-NEXT: or s9, s2, a3 -; RV64I-NEXT: sub a4, a7, a6 -; RV64I-NEXT: mv a3, a5 +; RV64I-NEXT: slli s8, t4, 8 +; RV64I-NEXT: lbu s10, 17(a0) +; RV64I-NEXT: lbu s9, 18(a0) +; RV64I-NEXT: lbu t4, 20(a0) +; RV64I-NEXT: lbu s6, 25(a0) +; RV64I-NEXT: lbu s5, 26(a0) +; RV64I-NEXT: lbu s7, 28(a0) +; RV64I-NEXT: slli a7, t0, 8 +; RV64I-NEXT: or ra, t5, a5 +; RV64I-NEXT: slli s11, s2, 8 +; RV64I-NEXT: slli s2, s1, 8 +; RV64I-NEXT: or s3, s3, s0 +; RV64I-NEXT: li a5, 128 +; RV64I-NEXT: sub t0, a5, a6 +; RV64I-NEXT: mv a5, a3 ; RV64I-NEXT: beqz a6, .LBB17_5 ; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: mv a3, s4 +; RV64I-NEXT: mv a5, s4 ; RV64I-NEXT: .LBB17_5: -; RV64I-NEXT: slli t6, ra, 8 -; RV64I-NEXT: or t5, t5, s11 -; RV64I-NEXT: or t4, t4, s8 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: lbu s8, 16(a0) +; RV64I-NEXT: slli s0, s10, 8 +; RV64I-NEXT: or t5, s8, s9 +; RV64I-NEXT: lbu s1, 16(a0) ; RV64I-NEXT: lbu a0, 24(a0) -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: or s2, s1, s7 -; RV64I-NEXT: or s1, s10, s6 -; RV64I-NEXT: slli s4, s9, 16 -; RV64I-NEXT: bltu a4, t0, .LBB17_7 +; RV64I-NEXT: or t4, a7, t4 +; RV64I-NEXT: slli s8, ra, 16 +; RV64I-NEXT: slli s6, s6, 8 +; RV64I-NEXT: or s4, s11, s5 +; RV64I-NEXT: or s2, s2, s7 +; RV64I-NEXT: slli s3, s3, 16 +; RV64I-NEXT: bltu t0, t1, .LBB17_7 ; RV64I-NEXT: # %bb.6: -; RV64I-NEXT: sub s0, a4, t0 -; RV64I-NEXT: srl s0, a5, s0 +; RV64I-NEXT: sub a7, t0, t1 +; RV64I-NEXT: srl a7, a3, a7 ; RV64I-NEXT: j .LBB17_8 ; RV64I-NEXT: .LBB17_7: -; RV64I-NEXT: neg s6, a4 -; RV64I-NEXT: sll s6, a5, s6 -; RV64I-NEXT: or s0, s0, s6 +; RV64I-NEXT: neg a7, t0 +; RV64I-NEXT: sll a7, a3, a7 +; RV64I-NEXT: or a7, t6, a7 ; RV64I-NEXT: .LBB17_8: -; RV64I-NEXT: or t6, t6, s8 -; RV64I-NEXT: slli s6, t5, 16 -; RV64I-NEXT: or s3, s3, t4 -; RV64I-NEXT: or t5, s5, a0 -; RV64I-NEXT: slli s2, s2, 16 -; RV64I-NEXT: or s1, s4, s1 -; RV64I-NEXT: mv t4, t1 -; RV64I-NEXT: beqz a4, .LBB17_10 +; RV64I-NEXT: or s0, s0, s1 +; RV64I-NEXT: slli s1, t5, 16 +; RV64I-NEXT: or t6, s8, t4 +; RV64I-NEXT: or t5, s6, a0 +; RV64I-NEXT: slli s4, s4, 16 +; RV64I-NEXT: or s2, s3, s2 +; RV64I-NEXT: mv t4, a4 +; RV64I-NEXT: beqz t0, .LBB17_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t4, s0 +; RV64I-NEXT: mv t4, a7 ; RV64I-NEXT: .LBB17_10: -; RV64I-NEXT: or a0, s6, t6 -; RV64I-NEXT: slli s0, s3, 32 -; RV64I-NEXT: or t6, s2, t5 -; RV64I-NEXT: slli s1, s1, 32 -; RV64I-NEXT: bltu a4, t0, .LBB17_12 +; RV64I-NEXT: or a0, s1, s0 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, s4, t5 +; RV64I-NEXT: slli s0, s2, 32 +; RV64I-NEXT: bltu t0, t1, .LBB17_12 ; RV64I-NEXT: # %bb.11: ; RV64I-NEXT: li t5, 0 ; RV64I-NEXT: j .LBB17_13 ; RV64I-NEXT: .LBB17_12: -; RV64I-NEXT: srl t5, a5, t3 +; RV64I-NEXT: srl t5, a3, t2 ; RV64I-NEXT: .LBB17_13: -; RV64I-NEXT: or a4, s0, a0 -; RV64I-NEXT: or a0, s1, t6 -; RV64I-NEXT: bltu a6, t0, .LBB17_15 +; RV64I-NEXT: or t0, t6, a0 +; RV64I-NEXT: or a0, s0, a7 +; RV64I-NEXT: bltu a6, t1, .LBB17_15 ; RV64I-NEXT: # %bb.14: -; RV64I-NEXT: li t6, 0 -; RV64I-NEXT: sll t2, a4, t2 +; RV64I-NEXT: li t2, 0 +; RV64I-NEXT: sll a7, t0, t3 ; RV64I-NEXT: j .LBB17_16 ; RV64I-NEXT: .LBB17_15: -; RV64I-NEXT: sll t6, a4, a6 -; RV64I-NEXT: srl t2, a4, t3 +; RV64I-NEXT: srl a7, t0, t2 ; RV64I-NEXT: sll t3, a0, a6 -; RV64I-NEXT: or t2, t2, t3 +; RV64I-NEXT: sll t2, t0, a6 +; RV64I-NEXT: or a7, a7, t3 ; RV64I-NEXT: .LBB17_16: -; RV64I-NEXT: sub s0, a6, a7 -; RV64I-NEXT: mv t3, a0 +; RV64I-NEXT: li t3, 128 +; RV64I-NEXT: sub s0, a6, t3 +; RV64I-NEXT: mv t6, a0 ; RV64I-NEXT: beqz a6, .LBB17_18 ; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: mv t3, t2 +; RV64I-NEXT: mv t6, a7 ; RV64I-NEXT: .LBB17_18: -; RV64I-NEXT: bltu s0, t0, .LBB17_20 +; RV64I-NEXT: bltu s0, t1, .LBB17_20 ; RV64I-NEXT: # %bb.19: -; RV64I-NEXT: li t2, 0 -; RV64I-NEXT: sub t0, s0, t0 -; RV64I-NEXT: sll t0, t1, t0 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub a7, s0, t1 +; RV64I-NEXT: sll a4, a4, a7 ; RV64I-NEXT: bnez s0, .LBB17_21 ; RV64I-NEXT: j .LBB17_22 ; RV64I-NEXT: .LBB17_20: -; RV64I-NEXT: sll t2, t1, s0 -; RV64I-NEXT: neg t0, s0 -; RV64I-NEXT: srl t0, t1, t0 -; RV64I-NEXT: sll t1, a5, s0 -; RV64I-NEXT: or t0, t0, t1 +; RV64I-NEXT: neg a7, s0 +; RV64I-NEXT: srl a7, a4, a7 +; RV64I-NEXT: sll t1, a3, s0 +; RV64I-NEXT: sll t3, a4, s0 +; RV64I-NEXT: or a4, a7, t1 ; RV64I-NEXT: beqz s0, .LBB17_22 ; RV64I-NEXT: .LBB17_21: -; RV64I-NEXT: mv a5, t0 +; RV64I-NEXT: mv a3, a4 ; RV64I-NEXT: .LBB17_22: -; RV64I-NEXT: bltu a6, a7, .LBB17_24 +; RV64I-NEXT: li a4, 128 +; RV64I-NEXT: bltu a6, a4, .LBB17_24 ; RV64I-NEXT: # %bb.23: ; RV64I-NEXT: li a1, 0 -; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a5, 0 ; RV64I-NEXT: bnez a6, .LBB17_25 ; RV64I-NEXT: j .LBB17_26 ; RV64I-NEXT: .LBB17_24: -; RV64I-NEXT: or t2, t4, t6 -; RV64I-NEXT: or a5, t5, t3 +; RV64I-NEXT: or t3, t4, t2 +; RV64I-NEXT: or a3, t5, t6 ; RV64I-NEXT: beqz a6, .LBB17_26 ; RV64I-NEXT: .LBB17_25: -; RV64I-NEXT: mv a4, t2 -; RV64I-NEXT: mv a0, a5 +; RV64I-NEXT: mv t0, t3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB17_26: -; RV64I-NEXT: srli a5, a1, 32 +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: srliw a6, a1, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a1, 24 -; RV64I-NEXT: srli t0, a1, 48 -; RV64I-NEXT: srli t5, a1, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a4, 32 -; RV64I-NEXT: srliw s2, a4, 16 -; RV64I-NEXT: srliw s6, a4, 24 -; RV64I-NEXT: srli s4, a4, 48 -; RV64I-NEXT: srli s7, a4, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a1, t2 -; RV64I-NEXT: srli s11, s11, 8 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw a7, a1, 24 +; RV64I-NEXT: and t1, a1, a3 +; RV64I-NEXT: srli t1, t1, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) +; RV64I-NEXT: sb t1, 1(a2) ; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a1, a5, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a1, 5(a2) -; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a1, a3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a1, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a1, a7, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a1, a4, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a1, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a7, 3(a2) +; RV64I-NEXT: and a6, a4, a3 +; RV64I-NEXT: srli a7, a1, 48 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: sb a6, 5(a2) +; RV64I-NEXT: sb a7, 6(a2) +; RV64I-NEXT: sb a1, 7(a2) +; RV64I-NEXT: srli a1, a5, 32 +; RV64I-NEXT: srliw a4, a5, 16 +; RV64I-NEXT: and a6, a5, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srliw a7, a5, 24 +; RV64I-NEXT: sb a5, 8(a2) +; RV64I-NEXT: sb a6, 9(a2) +; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a7, 11(a2) +; RV64I-NEXT: srli a4, a5, 48 +; RV64I-NEXT: and a6, a1, a3 +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: srli a5, a5, 56 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: sb a6, 13(a2) +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: sb a5, 15(a2) +; RV64I-NEXT: srli a1, t0, 32 +; RV64I-NEXT: and a4, t0, a3 +; RV64I-NEXT: srliw a5, t0, 16 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srliw a6, t0, 24 +; RV64I-NEXT: sb t0, 16(a2) +; RV64I-NEXT: sb a4, 17(a2) +; RV64I-NEXT: sb a5, 18(a2) +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: and a4, a1, a3 +; RV64I-NEXT: srli a5, t0, 48 +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: sb a1, 20(a2) +; RV64I-NEXT: sb a4, 21(a2) +; RV64I-NEXT: sb a5, 22(a2) +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) ; RV64I-NEXT: ld ra, 104(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 96(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 88(sp) # 8-byte Folded Reload @@ -8693,736 +8640,736 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: lbu a5, 3(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu a1, 0(a1) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu t0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a6 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or a5, a7, t1 +; RV32I-NEXT: or a1, t0, a1 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a6, a5, a3 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a5, a4, a3 ; RV32I-NEXT: slli a1, a1, 6 -; RV32I-NEXT: srli a5, a1, 5 -; RV32I-NEXT: sll t5, a6, a1 -; RV32I-NEXT: li s5, 1 -; RV32I-NEXT: mv a4, t5 -; RV32I-NEXT: beqz a5, .LBB17_2 +; RV32I-NEXT: srli a4, a1, 5 +; RV32I-NEXT: sll t4, a5, a1 +; RV32I-NEXT: li s4, 1 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beqz a4, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: .LBB17_2: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li s0, 2 -; RV32I-NEXT: beq a5, s5, .LBB17_4 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: beq a4, s4, .LBB17_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: mv a3, a6 ; RV32I-NEXT: .LBB17_4: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s0, .LBB17_6 +; RV32I-NEXT: beq a4, t6, .LBB17_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: .LBB17_6: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: li s8, 3 ; RV32I-NEXT: li s10, 4 -; RV32I-NEXT: beq a5, s8, .LBB17_8 +; RV32I-NEXT: beq a4, s8, .LBB17_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a6, a7 ; RV32I-NEXT: .LBB17_8: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: li s1, 5 -; RV32I-NEXT: beq a5, s10, .LBB17_10 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beq a4, s10, .LBB17_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB17_10: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: li s6, 6 -; RV32I-NEXT: beq a5, s1, .LBB17_12 +; RV32I-NEXT: beq a4, s0, .LBB17_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu a3, 6(a0) -; RV32I-NEXT: slli t2, t1, 8 -; RV32I-NEXT: beq a5, s6, .LBB17_14 +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s6, .LBB17_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a6, a7 ; RV32I-NEXT: .LBB17_14: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu t1, 4(a0) -; RV32I-NEXT: or t2, t2, a3 +; RV32I-NEXT: lbu t3, 4(a0) ; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: or a3, t2, t1 ; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_16 +; RV32I-NEXT: beq a4, ra, .LBB17_16 ; RV32I-NEXT: # %bb.15: -; RV32I-NEXT: mv a7, a4 +; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: .LBB17_16: -; RV32I-NEXT: or a3, t0, t1 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: andi t6, a1, 31 -; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: or t1, t0, t3 +; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: andi t5, a1, 31 +; RV32I-NEXT: mv a6, a5 ; RV32I-NEXT: beqz a1, .LBB17_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a6, a7 ; RV32I-NEXT: .LBB17_18: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t2, a3 -; RV32I-NEXT: neg s3, t6 -; RV32I-NEXT: srl s4, a6, s3 -; RV32I-NEXT: beqz t6, .LBB17_20 +; RV32I-NEXT: neg s2, t5 +; RV32I-NEXT: or a7, a3, t1 +; RV32I-NEXT: srl s3, a5, s2 +; RV32I-NEXT: beqz t5, .LBB17_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv t0, s4 +; RV32I-NEXT: mv t0, s3 ; RV32I-NEXT: .LBB17_20: ; RV32I-NEXT: sll s9, a7, a1 -; RV32I-NEXT: beqz a5, .LBB17_22 +; RV32I-NEXT: beqz a4, .LBB17_22 ; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: bne a5, s5, .LBB17_23 +; RV32I-NEXT: mv a5, t4 +; RV32I-NEXT: bne a4, s4, .LBB17_23 ; RV32I-NEXT: j .LBB17_24 ; RV32I-NEXT: .LBB17_22: ; RV32I-NEXT: or a3, s9, t0 -; RV32I-NEXT: mv a6, t5 -; RV32I-NEXT: beq a5, s5, .LBB17_24 +; RV32I-NEXT: mv a5, t4 +; RV32I-NEXT: beq a4, s4, .LBB17_24 ; RV32I-NEXT: .LBB17_23: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a5, a3 ; RV32I-NEXT: .LBB17_24: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s0, .LBB17_40 +; RV32I-NEXT: bne a4, t6, .LBB17_40 ; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bne a5, s8, .LBB17_41 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bne a4, s8, .LBB17_41 ; RV32I-NEXT: .LBB17_26: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s10, .LBB17_28 +; RV32I-NEXT: beq a4, s10, .LBB17_28 ; RV32I-NEXT: .LBB17_27: -; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB17_28: ; RV32I-NEXT: lbu t2, 11(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s1, .LBB17_30 +; RV32I-NEXT: beq a4, s0, .LBB17_30 ; RV32I-NEXT: # %bb.29: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB17_30: -; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: lbu t1, 9(a0) ; RV32I-NEXT: lbu a3, 10(a0) ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: beq a5, s6, .LBB17_32 +; RV32I-NEXT: beq a4, s6, .LBB17_32 ; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: mv a5, t0 ; RV32I-NEXT: .LBB17_32: ; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: lbu t3, 8(a0) ; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_34 +; RV32I-NEXT: beq a4, ra, .LBB17_34 ; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: mv t0, a5 ; RV32I-NEXT: .LBB17_34: ; RV32I-NEXT: or a3, t1, t3 -; RV32I-NEXT: slli a6, t2, 16 -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: slli a5, t2, 16 +; RV32I-NEXT: mv s1, a7 ; RV32I-NEXT: beqz a1, .LBB17_36 ; RV32I-NEXT: # %bb.35: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: mv s1, t0 ; RV32I-NEXT: .LBB17_36: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, a6, a3 -; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: srl a3, a7, s2 ; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB17_38 +; RV32I-NEXT: beqz t5, .LBB17_38 ; RV32I-NEXT: # %bb.37: ; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_38: -; RV32I-NEXT: sll s7, a6, a1 -; RV32I-NEXT: beqz a5, .LBB17_42 +; RV32I-NEXT: sll s7, a5, a1 +; RV32I-NEXT: beqz a4, .LBB17_42 ; RV32I-NEXT: # %bb.39: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_43 +; RV32I-NEXT: bnez t5, .LBB17_43 ; RV32I-NEXT: j .LBB17_44 ; RV32I-NEXT: .LBB17_40: -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: beq a5, s8, .LBB17_26 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: beq a4, s8, .LBB17_26 ; RV32I-NEXT: .LBB17_41: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a5, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s10, .LBB17_27 +; RV32I-NEXT: bne a4, s10, .LBB17_27 ; RV32I-NEXT: j .LBB17_28 ; RV32I-NEXT: .LBB17_42: ; RV32I-NEXT: or a7, s7, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_44 +; RV32I-NEXT: beqz t5, .LBB17_44 ; RV32I-NEXT: .LBB17_43: -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: .LBB17_44: -; RV32I-NEXT: beq a5, s5, .LBB17_61 +; RV32I-NEXT: beq a4, s4, .LBB17_61 ; RV32I-NEXT: # %bb.45: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne a5, s0, .LBB17_62 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: bne a4, t6, .LBB17_62 ; RV32I-NEXT: .LBB17_46: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bne a5, s8, .LBB17_63 +; RV32I-NEXT: bne a4, s8, .LBB17_63 ; RV32I-NEXT: .LBB17_47: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s10, .LBB17_49 +; RV32I-NEXT: beq a4, s10, .LBB17_49 ; RV32I-NEXT: .LBB17_48: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB17_49: -; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: lbu t2, 15(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s1, .LBB17_51 +; RV32I-NEXT: beq a4, s0, .LBB17_51 ; RV32I-NEXT: # %bb.50: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB17_51: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 13(a0) ; RV32I-NEXT: lbu a3, 14(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s6, .LBB17_53 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s6, .LBB17_53 ; RV32I-NEXT: # %bb.52: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB17_53: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 12(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_55 +; RV32I-NEXT: beq a4, ra, .LBB17_55 ; RV32I-NEXT: # %bb.54: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB17_55: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: mv s2, a6 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv s5, a5 ; RV32I-NEXT: beqz a1, .LBB17_57 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv s2, t0 +; RV32I-NEXT: mv s5, t0 ; RV32I-NEXT: .LBB17_57: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a7, t3, a3 -; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: or a7, t2, a3 +; RV32I-NEXT: srl a3, a5, s2 ; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz t6, .LBB17_59 +; RV32I-NEXT: beqz t5, .LBB17_59 ; RV32I-NEXT: # %bb.58: ; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_59: ; RV32I-NEXT: sll a3, a7, a1 ; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a5, .LBB17_64 +; RV32I-NEXT: beqz a4, .LBB17_64 ; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_65 +; RV32I-NEXT: bnez t5, .LBB17_65 ; RV32I-NEXT: j .LBB17_66 ; RV32I-NEXT: .LBB17_61: ; RV32I-NEXT: or a7, s9, a3 -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: beq a5, s0, .LBB17_46 +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: beq a4, t6, .LBB17_46 ; RV32I-NEXT: .LBB17_62: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: beq a5, s8, .LBB17_47 +; RV32I-NEXT: beq a4, s8, .LBB17_47 ; RV32I-NEXT: .LBB17_63: ; RV32I-NEXT: mv a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s10, .LBB17_48 +; RV32I-NEXT: bne a4, s10, .LBB17_48 ; RV32I-NEXT: j .LBB17_49 ; RV32I-NEXT: .LBB17_64: -; RV32I-NEXT: or a6, a3, t0 +; RV32I-NEXT: or a5, a3, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_66 +; RV32I-NEXT: beqz t5, .LBB17_66 ; RV32I-NEXT: .LBB17_65: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_66: -; RV32I-NEXT: beq a5, s5, .LBB17_84 +; RV32I-NEXT: beq a4, s4, .LBB17_84 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_85 +; RV32I-NEXT: bnez t5, .LBB17_85 ; RV32I-NEXT: .LBB17_68: -; RV32I-NEXT: beq a5, s0, .LBB17_86 +; RV32I-NEXT: beq a4, t6, .LBB17_86 ; RV32I-NEXT: .LBB17_69: -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: bne a5, s8, .LBB17_87 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: bne a4, s8, .LBB17_87 ; RV32I-NEXT: .LBB17_70: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beq a5, s10, .LBB17_72 +; RV32I-NEXT: beq a4, s10, .LBB17_72 ; RV32I-NEXT: .LBB17_71: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB17_72: -; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: lbu t2, 19(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s1, .LBB17_74 +; RV32I-NEXT: beq a4, s0, .LBB17_74 ; RV32I-NEXT: # %bb.73: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB17_74: -; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: lbu t1, 17(a0) ; RV32I-NEXT: lbu a3, 18(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: beq a5, s6, .LBB17_76 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a4, s6, .LBB17_76 ; RV32I-NEXT: # %bb.75: -; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: mv a5, t0 ; RV32I-NEXT: .LBB17_76: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 16(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 16(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_78 +; RV32I-NEXT: beq a4, ra, .LBB17_78 ; RV32I-NEXT: # %bb.77: -; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: mv t0, a5 ; RV32I-NEXT: .LBB17_78: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli a5, t2, 16 ; RV32I-NEXT: mv s6, a7 ; RV32I-NEXT: beqz a1, .LBB17_80 ; RV32I-NEXT: # %bb.79: ; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB17_80: ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: or a6, t3, a3 -; RV32I-NEXT: srl s10, a7, s3 -; RV32I-NEXT: beqz t6, .LBB17_82 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: srl s10, a7, s2 +; RV32I-NEXT: beqz t5, .LBB17_82 ; RV32I-NEXT: # %bb.81: ; RV32I-NEXT: mv t0, s10 ; RV32I-NEXT: .LBB17_82: -; RV32I-NEXT: sll s11, a6, a1 -; RV32I-NEXT: beqz a5, .LBB17_88 +; RV32I-NEXT: sll s11, a5, a1 +; RV32I-NEXT: beqz a4, .LBB17_88 ; RV32I-NEXT: # %bb.83: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_89 +; RV32I-NEXT: bnez t5, .LBB17_89 ; RV32I-NEXT: j .LBB17_90 ; RV32I-NEXT: .LBB17_84: -; RV32I-NEXT: or a6, s7, a3 +; RV32I-NEXT: or a5, s7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_68 +; RV32I-NEXT: beqz t5, .LBB17_68 ; RV32I-NEXT: .LBB17_85: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne a5, s0, .LBB17_69 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: bne a4, t6, .LBB17_69 ; RV32I-NEXT: .LBB17_86: -; RV32I-NEXT: or a6, s9, a3 -; RV32I-NEXT: mv t0, t5 -; RV32I-NEXT: beq a5, s8, .LBB17_70 +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: beq a4, s8, .LBB17_70 ; RV32I-NEXT: .LBB17_87: -; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: mv t0, a5 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bne a5, s10, .LBB17_71 +; RV32I-NEXT: bne a4, s10, .LBB17_71 ; RV32I-NEXT: j .LBB17_72 ; RV32I-NEXT: .LBB17_88: ; RV32I-NEXT: or a7, s11, t0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_90 +; RV32I-NEXT: beqz t5, .LBB17_90 ; RV32I-NEXT: .LBB17_89: ; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_90: -; RV32I-NEXT: beq a5, s5, .LBB17_110 +; RV32I-NEXT: beq a4, s4, .LBB17_110 ; RV32I-NEXT: # %bb.91: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_111 +; RV32I-NEXT: bnez t5, .LBB17_111 ; RV32I-NEXT: .LBB17_92: -; RV32I-NEXT: beq a5, s0, .LBB17_112 +; RV32I-NEXT: beq a4, t6, .LBB17_112 ; RV32I-NEXT: .LBB17_93: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_113 +; RV32I-NEXT: bnez t5, .LBB17_113 ; RV32I-NEXT: .LBB17_94: -; RV32I-NEXT: bne a5, s8, .LBB17_96 +; RV32I-NEXT: bne a4, s8, .LBB17_96 ; RV32I-NEXT: .LBB17_95: ; RV32I-NEXT: or a7, s9, a3 ; RV32I-NEXT: .LBB17_96: -; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: mv a3, t4 ; RV32I-NEXT: li t0, 4 -; RV32I-NEXT: beq a5, t0, .LBB17_98 +; RV32I-NEXT: beq a4, t0, .LBB17_98 ; RV32I-NEXT: # %bb.97: ; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB17_98: -; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: lbu t2, 23(a0) ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: beq a5, s1, .LBB17_100 +; RV32I-NEXT: beq a4, s0, .LBB17_100 ; RV32I-NEXT: # %bb.99: ; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB17_100: ; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: lbu t1, 21(a0) ; RV32I-NEXT: lbu a3, 22(a0) -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: li t4, 6 -; RV32I-NEXT: beq a5, t4, .LBB17_102 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: li t3, 6 +; RV32I-NEXT: beq a4, t3, .LBB17_102 ; RV32I-NEXT: # %bb.101: ; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB17_102: ; RV32I-NEXT: sw s9, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t0, 0 -; RV32I-NEXT: lbu t4, 20(a0) -; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: lbu t3, 20(a0) +; RV32I-NEXT: or t2, t2, a3 ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_104 +; RV32I-NEXT: beq a4, ra, .LBB17_104 ; RV32I-NEXT: # %bb.103: ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: .LBB17_104: -; RV32I-NEXT: or a3, t1, t4 -; RV32I-NEXT: slli t3, t3, 16 -; RV32I-NEXT: mv s9, a6 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: mv s9, a5 ; RV32I-NEXT: beqz a1, .LBB17_106 ; RV32I-NEXT: # %bb.105: ; RV32I-NEXT: mv s9, t0 ; RV32I-NEXT: .LBB17_106: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: or t0, t3, a3 -; RV32I-NEXT: srl a6, a6, s3 -; RV32I-NEXT: beqz t6, .LBB17_108 +; RV32I-NEXT: or t0, t2, a3 +; RV32I-NEXT: srl a5, a5, s2 +; RV32I-NEXT: beqz t5, .LBB17_108 ; RV32I-NEXT: # %bb.107: -; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: mv t1, a5 ; RV32I-NEXT: .LBB17_108: ; RV32I-NEXT: sll a7, t0, a1 -; RV32I-NEXT: beqz a5, .LBB17_114 +; RV32I-NEXT: beqz a4, .LBB17_114 ; RV32I-NEXT: # %bb.109: ; RV32I-NEXT: li t1, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_115 +; RV32I-NEXT: bnez t5, .LBB17_115 ; RV32I-NEXT: j .LBB17_116 ; RV32I-NEXT: .LBB17_110: ; RV32I-NEXT: lw a7, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: or a7, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_92 +; RV32I-NEXT: beqz t5, .LBB17_92 ; RV32I-NEXT: .LBB17_111: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB17_93 +; RV32I-NEXT: bne a4, t6, .LBB17_93 ; RV32I-NEXT: .LBB17_112: ; RV32I-NEXT: or a7, s7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_94 +; RV32I-NEXT: beqz t5, .LBB17_94 ; RV32I-NEXT: .LBB17_113: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: beq a5, s8, .LBB17_95 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: beq a4, s8, .LBB17_95 ; RV32I-NEXT: j .LBB17_96 ; RV32I-NEXT: .LBB17_114: ; RV32I-NEXT: or t1, a7, t1 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_116 +; RV32I-NEXT: beqz t5, .LBB17_116 ; RV32I-NEXT: .LBB17_115: ; RV32I-NEXT: mv a3, s10 ; RV32I-NEXT: .LBB17_116: -; RV32I-NEXT: beq a5, s5, .LBB17_138 +; RV32I-NEXT: beq a4, s4, .LBB17_138 ; RV32I-NEXT: # %bb.117: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_139 +; RV32I-NEXT: bnez t5, .LBB17_139 ; RV32I-NEXT: .LBB17_118: -; RV32I-NEXT: beq a5, s0, .LBB17_140 +; RV32I-NEXT: beq a4, t6, .LBB17_140 ; RV32I-NEXT: .LBB17_119: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_141 +; RV32I-NEXT: bnez t5, .LBB17_141 ; RV32I-NEXT: .LBB17_120: -; RV32I-NEXT: bne a5, s8, .LBB17_122 +; RV32I-NEXT: bne a4, s8, .LBB17_122 ; RV32I-NEXT: .LBB17_121: ; RV32I-NEXT: or t1, s7, a3 ; RV32I-NEXT: .LBB17_122: -; RV32I-NEXT: li s4, 1 +; RV32I-NEXT: li s3, 1 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_124 +; RV32I-NEXT: beqz t5, .LBB17_124 ; RV32I-NEXT: # %bb.123: ; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_124: -; RV32I-NEXT: li s5, 3 +; RV32I-NEXT: li s4, 3 ; RV32I-NEXT: li s8, 2 -; RV32I-NEXT: li t3, 4 -; RV32I-NEXT: bne a5, t3, .LBB17_126 +; RV32I-NEXT: li t2, 4 +; RV32I-NEXT: bne a4, t2, .LBB17_126 ; RV32I-NEXT: # %bb.125: ; RV32I-NEXT: lw t1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, t1, a3 ; RV32I-NEXT: .LBB17_126: -; RV32I-NEXT: lbu s0, 27(a0) -; RV32I-NEXT: mv t3, t5 -; RV32I-NEXT: beq a5, s1, .LBB17_128 +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: mv t2, t4 +; RV32I-NEXT: beq a4, s0, .LBB17_128 ; RV32I-NEXT: # %bb.127: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB17_128: ; RV32I-NEXT: li t1, 0 -; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu t3, 25(a0) ; RV32I-NEXT: lbu a3, 26(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: beq a5, s1, .LBB17_130 +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: beq a4, s0, .LBB17_130 ; RV32I-NEXT: # %bb.129: -; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: mv t1, t2 ; RV32I-NEXT: .LBB17_130: -; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s1, 24(a0) -; RV32I-NEXT: or s0, s0, a3 -; RV32I-NEXT: slli a3, t4, 8 -; RV32I-NEXT: beq a5, ra, .LBB17_132 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s0, 24(a0) +; RV32I-NEXT: or t6, t6, a3 +; RV32I-NEXT: slli a3, t3, 8 +; RV32I-NEXT: beq a4, ra, .LBB17_132 ; RV32I-NEXT: # %bb.131: -; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: .LBB17_132: -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: or a3, a3, s0 +; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: mv ra, t0 ; RV32I-NEXT: beqz a1, .LBB17_134 ; RV32I-NEXT: # %bb.133: -; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: mv ra, t2 ; RV32I-NEXT: .LBB17_134: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or t3, s0, a3 -; RV32I-NEXT: srl t0, t0, s3 -; RV32I-NEXT: li s0, 5 -; RV32I-NEXT: beqz t6, .LBB17_136 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or t2, t6, a3 +; RV32I-NEXT: srl t0, t0, s2 +; RV32I-NEXT: li t6, 5 +; RV32I-NEXT: beqz t5, .LBB17_136 ; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: mv t3, t0 ; RV32I-NEXT: .LBB17_136: -; RV32I-NEXT: sll t1, t3, a1 -; RV32I-NEXT: beqz a5, .LBB17_142 +; RV32I-NEXT: sll t1, t2, a1 +; RV32I-NEXT: beqz a4, .LBB17_142 ; RV32I-NEXT: # %bb.137: -; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_143 +; RV32I-NEXT: bnez t5, .LBB17_143 ; RV32I-NEXT: j .LBB17_144 ; RV32I-NEXT: .LBB17_138: ; RV32I-NEXT: or t1, s11, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_118 +; RV32I-NEXT: beqz t5, .LBB17_118 ; RV32I-NEXT: .LBB17_139: ; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB17_119 +; RV32I-NEXT: bne a4, t6, .LBB17_119 ; RV32I-NEXT: .LBB17_140: ; RV32I-NEXT: lw t1, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: or t1, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_120 +; RV32I-NEXT: beqz t5, .LBB17_120 ; RV32I-NEXT: .LBB17_141: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: beq a5, s8, .LBB17_121 +; RV32I-NEXT: beq a4, s8, .LBB17_121 ; RV32I-NEXT: j .LBB17_122 ; RV32I-NEXT: .LBB17_142: -; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: or t3, t1, t3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_144 +; RV32I-NEXT: beqz t5, .LBB17_144 ; RV32I-NEXT: .LBB17_143: -; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB17_144: -; RV32I-NEXT: beq a5, s4, .LBB17_164 +; RV32I-NEXT: beq a4, s3, .LBB17_164 ; RV32I-NEXT: # %bb.145: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_165 +; RV32I-NEXT: bnez t5, .LBB17_165 ; RV32I-NEXT: .LBB17_146: -; RV32I-NEXT: beq a5, s8, .LBB17_166 +; RV32I-NEXT: beq a4, s8, .LBB17_166 ; RV32I-NEXT: .LBB17_147: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_167 +; RV32I-NEXT: bnez t5, .LBB17_167 ; RV32I-NEXT: .LBB17_148: -; RV32I-NEXT: beq a5, s5, .LBB17_168 +; RV32I-NEXT: beq a4, s4, .LBB17_168 ; RV32I-NEXT: .LBB17_149: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_169 +; RV32I-NEXT: bnez t5, .LBB17_169 ; RV32I-NEXT: .LBB17_150: -; RV32I-NEXT: li s1, 4 -; RV32I-NEXT: beq a5, s1, .LBB17_170 +; RV32I-NEXT: li s0, 4 +; RV32I-NEXT: beq a4, s0, .LBB17_170 ; RV32I-NEXT: .LBB17_151: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_171 +; RV32I-NEXT: bnez t5, .LBB17_171 ; RV32I-NEXT: .LBB17_152: -; RV32I-NEXT: bne a5, s0, .LBB17_154 +; RV32I-NEXT: bne a4, t6, .LBB17_154 ; RV32I-NEXT: .LBB17_153: -; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, t3, a3 ; RV32I-NEXT: .LBB17_154: -; RV32I-NEXT: lbu s0, 31(a0) -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: beq a5, s1, .LBB17_156 -; RV32I-NEXT: # %bb.155: +; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: beq a4, s0, .LBB17_156 +; RV32I-NEXT: # %bb.155: +; RV32I-NEXT: mv a3, t3 ; RV32I-NEXT: .LBB17_156: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: lbu s5, 29(a0) -; RV32I-NEXT: lbu s1, 30(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: li s4, 7 -; RV32I-NEXT: beq a5, s4, .LBB17_158 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s4, 29(a0) +; RV32I-NEXT: lbu s0, 30(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: beq a4, s3, .LBB17_158 ; RV32I-NEXT: # %bb.157: -; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: mv t3, a3 ; RV32I-NEXT: .LBB17_158: ; RV32I-NEXT: lbu a3, 28(a0) -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s0, s0, s1 -; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: or t6, t6, s0 +; RV32I-NEXT: mv a0, t2 ; RV32I-NEXT: beqz a1, .LBB17_160 ; RV32I-NEXT: # %bb.159: -; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: mv a0, t3 ; RV32I-NEXT: .LBB17_160: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: li s1, 5 -; RV32I-NEXT: li s4, 4 -; RV32I-NEXT: beqz t6, .LBB17_162 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: or a3, s4, a3 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: li s3, 4 +; RV32I-NEXT: beqz t5, .LBB17_162 ; RV32I-NEXT: # %bb.161: -; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: srl t3, t2, s2 ; RV32I-NEXT: .LBB17_162: -; RV32I-NEXT: or s3, s0, a3 -; RV32I-NEXT: li s0, 6 -; RV32I-NEXT: li s5, 1 -; RV32I-NEXT: beqz a5, .LBB17_172 +; RV32I-NEXT: or s2, t6, a3 +; RV32I-NEXT: li t6, 6 +; RV32I-NEXT: li s4, 1 +; RV32I-NEXT: beqz a4, .LBB17_172 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li t2, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_173 +; RV32I-NEXT: bnez t5, .LBB17_173 ; RV32I-NEXT: j .LBB17_174 ; RV32I-NEXT: .LBB17_164: -; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: or t3, a7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_146 +; RV32I-NEXT: beqz t5, .LBB17_146 ; RV32I-NEXT: .LBB17_165: ; RV32I-NEXT: mv a3, s10 -; RV32I-NEXT: bne a5, s8, .LBB17_147 +; RV32I-NEXT: bne a4, s8, .LBB17_147 ; RV32I-NEXT: .LBB17_166: -; RV32I-NEXT: or t4, s11, a3 +; RV32I-NEXT: or t3, s11, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_148 +; RV32I-NEXT: beqz t5, .LBB17_148 ; RV32I-NEXT: .LBB17_167: ; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s5, .LBB17_149 +; RV32I-NEXT: bne a4, s4, .LBB17_149 ; RV32I-NEXT: .LBB17_168: -; RV32I-NEXT: lw t4, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: lw t3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, t3, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_150 +; RV32I-NEXT: beqz t5, .LBB17_150 ; RV32I-NEXT: .LBB17_169: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: li s1, 4 -; RV32I-NEXT: bne a5, s1, .LBB17_151 +; RV32I-NEXT: li s0, 4 +; RV32I-NEXT: bne a4, s0, .LBB17_151 ; RV32I-NEXT: .LBB17_170: -; RV32I-NEXT: or t4, s7, a3 +; RV32I-NEXT: or t3, s7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_152 +; RV32I-NEXT: beqz t5, .LBB17_152 ; RV32I-NEXT: .LBB17_171: ; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: beq a5, s0, .LBB17_153 +; RV32I-NEXT: beq a4, t6, .LBB17_153 ; RV32I-NEXT: j .LBB17_154 ; RV32I-NEXT: .LBB17_172: -; RV32I-NEXT: sll a3, s3, a1 -; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: sll a3, s2, a1 +; RV32I-NEXT: or t2, a3, t3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_174 +; RV32I-NEXT: beqz t5, .LBB17_174 ; RV32I-NEXT: .LBB17_173: ; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB17_174: -; RV32I-NEXT: beq a5, s5, .LBB17_190 +; RV32I-NEXT: beq a4, s4, .LBB17_190 ; RV32I-NEXT: # %bb.175: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_191 +; RV32I-NEXT: bnez t5, .LBB17_191 ; RV32I-NEXT: .LBB17_176: -; RV32I-NEXT: bne a5, s8, .LBB17_178 +; RV32I-NEXT: bne a4, s8, .LBB17_178 ; RV32I-NEXT: .LBB17_177: -; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: or t2, a7, a3 ; RV32I-NEXT: .LBB17_178: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a6, 3 -; RV32I-NEXT: bnez t6, .LBB17_192 +; RV32I-NEXT: li a5, 3 +; RV32I-NEXT: bnez t5, .LBB17_192 ; RV32I-NEXT: # %bb.179: -; RV32I-NEXT: beq a5, a6, .LBB17_193 +; RV32I-NEXT: beq a4, a5, .LBB17_193 ; RV32I-NEXT: .LBB17_180: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_194 +; RV32I-NEXT: bnez t5, .LBB17_194 ; RV32I-NEXT: .LBB17_181: -; RV32I-NEXT: beq a5, s4, .LBB17_195 +; RV32I-NEXT: beq a4, s3, .LBB17_195 ; RV32I-NEXT: .LBB17_182: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_196 +; RV32I-NEXT: bnez t5, .LBB17_196 ; RV32I-NEXT: .LBB17_183: -; RV32I-NEXT: beq a5, s1, .LBB17_197 +; RV32I-NEXT: beq a4, s0, .LBB17_197 ; RV32I-NEXT: .LBB17_184: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t6, .LBB17_198 +; RV32I-NEXT: bnez t5, .LBB17_198 ; RV32I-NEXT: .LBB17_185: -; RV32I-NEXT: beq a5, s0, .LBB17_199 +; RV32I-NEXT: beq a4, t6, .LBB17_199 ; RV32I-NEXT: .LBB17_186: ; RV32I-NEXT: li a3, 7 -; RV32I-NEXT: bne a5, a3, .LBB17_200 +; RV32I-NEXT: bne a4, a3, .LBB17_200 ; RV32I-NEXT: .LBB17_187: ; RV32I-NEXT: beqz a1, .LBB17_189 ; RV32I-NEXT: .LBB17_188: -; RV32I-NEXT: mv s3, t5 +; RV32I-NEXT: mv s2, t4 ; RV32I-NEXT: .LBB17_189: -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: lui a7, 16 -; RV32I-NEXT: srli a6, a4, 24 -; RV32I-NEXT: srli a3, t2, 16 -; RV32I-NEXT: srli t1, t2, 24 -; RV32I-NEXT: srli a5, s2, 16 -; RV32I-NEXT: srli t5, s2, 24 -; RV32I-NEXT: srli t0, s6, 16 -; RV32I-NEXT: srli t6, s6, 24 -; RV32I-NEXT: srli t4, s9, 16 -; RV32I-NEXT: srli s4, s9, 24 -; RV32I-NEXT: srli t3, ra, 16 -; RV32I-NEXT: srli s1, ra, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s7, s3, 16 -; RV32I-NEXT: srli s8, s3, 24 -; RV32I-NEXT: addi a7, a7, -1 -; RV32I-NEXT: and s10, a4, a7 -; RV32I-NEXT: and s11, t2, a7 -; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb a4, 0(a2) -; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a1, 2(a2) -; RV32I-NEXT: sb a6, 3(a2) -; RV32I-NEXT: and a1, s2, a7 -; RV32I-NEXT: srli a4, s11, 8 -; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: srli a3, a6, 16 +; RV32I-NEXT: and a4, a6, a1 +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a6, 0(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: sb a5, 3(a2) +; RV32I-NEXT: srli a3, s1, 16 +; RV32I-NEXT: and a4, s1, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s1, 24 +; RV32I-NEXT: sb s1, 4(a2) ; RV32I-NEXT: sb a4, 5(a2) ; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: sb t1, 7(a2) -; RV32I-NEXT: and a3, s6, a7 -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb s2, 8(a2) -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb a5, 10(a2) -; RV32I-NEXT: sb t5, 11(a2) -; RV32I-NEXT: and a1, s9, a7 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a5, 7(a2) +; RV32I-NEXT: srli a3, s5, 16 +; RV32I-NEXT: and a4, s5, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s5, 24 +; RV32I-NEXT: sb s5, 8(a2) +; RV32I-NEXT: sb a4, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb a5, 11(a2) +; RV32I-NEXT: srli a3, s6, 16 +; RV32I-NEXT: and a4, s6, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s6, 24 ; RV32I-NEXT: sb s6, 12(a2) -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t0, 14(a2) -; RV32I-NEXT: sb t6, 15(a2) -; RV32I-NEXT: and a3, ra, a7 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a5, 15(a2) +; RV32I-NEXT: srli a3, s9, 16 +; RV32I-NEXT: and a4, s9, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, s9, 24 ; RV32I-NEXT: sb s9, 16(a2) -; RV32I-NEXT: sb a1, 17(a2) -; RV32I-NEXT: sb t4, 18(a2) -; RV32I-NEXT: sb s4, 19(a2) -; RV32I-NEXT: and a1, a0, a7 -; RV32I-NEXT: and a4, s3, a7 -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a5, 19(a2) +; RV32I-NEXT: srli a3, ra, 16 +; RV32I-NEXT: and a4, ra, a1 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, ra, 24 ; RV32I-NEXT: sb ra, 20(a2) -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t3, 22(a2) -; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a5, 23(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a1 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: sb a0, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s0, 26(a2) -; RV32I-NEXT: sb s5, 27(a2) -; RV32I-NEXT: sb s3, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: sb a4, 25(a2) +; RV32I-NEXT: sb a3, 26(a2) +; RV32I-NEXT: sb a5, 27(a2) +; RV32I-NEXT: srli a0, s2, 16 +; RV32I-NEXT: and a1, s2, a1 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: srli a3, s2, 24 +; RV32I-NEXT: sb s2, 28(a2) +; RV32I-NEXT: sb a1, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -9439,45 +9386,45 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB17_190: -; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: or t2, t1, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_176 +; RV32I-NEXT: beqz t5, .LBB17_176 ; RV32I-NEXT: .LBB17_191: -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: beq a5, s8, .LBB17_177 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beq a4, s8, .LBB17_177 ; RV32I-NEXT: j .LBB17_178 ; RV32I-NEXT: .LBB17_192: ; RV32I-NEXT: mv a3, s10 -; RV32I-NEXT: bne a5, a6, .LBB17_180 +; RV32I-NEXT: bne a4, a5, .LBB17_180 ; RV32I-NEXT: .LBB17_193: -; RV32I-NEXT: or t3, s11, a3 +; RV32I-NEXT: or t2, s11, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_181 +; RV32I-NEXT: beqz t5, .LBB17_181 ; RV32I-NEXT: .LBB17_194: ; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s4, .LBB17_182 +; RV32I-NEXT: bne a4, s3, .LBB17_182 ; RV32I-NEXT: .LBB17_195: -; RV32I-NEXT: lw a6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: lw a5, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t2, a5, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_183 +; RV32I-NEXT: beqz t5, .LBB17_183 ; RV32I-NEXT: .LBB17_196: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s1, .LBB17_184 +; RV32I-NEXT: bne a4, s0, .LBB17_184 ; RV32I-NEXT: .LBB17_197: -; RV32I-NEXT: or t3, s7, a3 +; RV32I-NEXT: or t2, s7, a3 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t6, .LBB17_185 +; RV32I-NEXT: beqz t5, .LBB17_185 ; RV32I-NEXT: .LBB17_198: ; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne a5, s0, .LBB17_186 +; RV32I-NEXT: bne a4, t6, .LBB17_186 ; RV32I-NEXT: .LBB17_199: -; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: lw a5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t2, a5, a3 ; RV32I-NEXT: li a3, 7 -; RV32I-NEXT: beq a5, a3, .LBB17_187 +; RV32I-NEXT: beq a4, a3, .LBB17_187 ; RV32I-NEXT: .LBB17_200: -; RV32I-NEXT: mv t5, t3 +; RV32I-NEXT: mv t4, t2 ; RV32I-NEXT: bnez a1, .LBB17_188 ; RV32I-NEXT: j .LBB17_189 %src = load i256, ptr %src.ptr, align 1 @@ -9491,1201 +9438,1175 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a6, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a7, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a5, 16 +; RV64I-NEXT: slli a0, a7, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli t3, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 -; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: sub t1, a6, t0 -; RV64I-NEXT: neg t5, a6 -; RV64I-NEXT: sll t4, a5, t5 -; RV64I-NEXT: bltu a6, t0, .LBB18_2 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: or a7, t5, t4 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: slli t1, t1, 32 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: or a5, t1, t3 +; RV64I-NEXT: slli a7, a7, 3 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a7 +; RV64I-NEXT: sub t4, a7, t1 +; RV64I-NEXT: sll t5, a5, t3 +; RV64I-NEXT: bltu a7, t1, .LBB18_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra t6, a5, t1 +; RV64I-NEXT: sra t6, a5, t4 ; RV64I-NEXT: j .LBB18_3 ; RV64I-NEXT: .LBB18_2: -; RV64I-NEXT: srl t6, a7, a6 -; RV64I-NEXT: or t6, t6, t4 +; RV64I-NEXT: srl t6, a3, a7 +; RV64I-NEXT: or t6, t6, t5 ; RV64I-NEXT: .LBB18_3: -; RV64I-NEXT: or a3, t3, a3 -; RV64I-NEXT: slli t3, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a7 -; RV64I-NEXT: beqz a6, .LBB18_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a7, .LBB18_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB18_5: -; RV64I-NEXT: or a4, t3, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a6, t0, .LBB18_7 +; RV64I-NEXT: or a6, a6, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a7, t1, .LBB18_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: srai a0, a5, 63 -; RV64I-NEXT: srl t3, a3, t1 +; RV64I-NEXT: srl t4, a4, t4 ; RV64I-NEXT: j .LBB18_8 ; RV64I-NEXT: .LBB18_7: -; RV64I-NEXT: sra a0, a5, a6 -; RV64I-NEXT: srl t1, a4, a6 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t3, t1, t2 +; RV64I-NEXT: srl t0, a6, a7 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: sra a0, a5, a7 +; RV64I-NEXT: or t4, t0, t2 ; RV64I-NEXT: .LBB18_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a6, .LBB18_10 +; RV64I-NEXT: li t0, 128 +; RV64I-NEXT: mv t2, a6 +; RV64I-NEXT: beqz a7, .LBB18_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t3 +; RV64I-NEXT: mv t2, t4 ; RV64I-NEXT: .LBB18_10: -; RV64I-NEXT: sub t6, t1, a6 -; RV64I-NEXT: bltu a6, t0, .LBB18_13 +; RV64I-NEXT: sub t6, t0, a7 +; RV64I-NEXT: bltu a7, t1, .LBB18_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t3, 0 -; RV64I-NEXT: bgeu t6, t0, .LBB18_14 +; RV64I-NEXT: li t4, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB18_14 ; RV64I-NEXT: .LBB18_12: -; RV64I-NEXT: sll t5, a7, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a7, s0 -; RV64I-NEXT: or s1, s0, t4 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t5 ; RV64I-NEXT: j .LBB18_15 ; RV64I-NEXT: .LBB18_13: -; RV64I-NEXT: srl t3, a3, a6 -; RV64I-NEXT: bltu t6, t0, .LBB18_12 +; RV64I-NEXT: srl t4, a4, a7 +; RV64I-NEXT: bltu t6, t1, .LBB18_12 ; RV64I-NEXT: .LBB18_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t4, t6, t0 -; RV64I-NEXT: sll s1, a7, t4 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t5, t6, t1 +; RV64I-NEXT: sll s1, a3, t5 ; RV64I-NEXT: .LBB18_15: -; RV64I-NEXT: sub s0, a6, t1 -; RV64I-NEXT: mv t4, a5 +; RV64I-NEXT: sub s0, a7, t0 +; RV64I-NEXT: mv t5, a5 ; RV64I-NEXT: beqz t6, .LBB18_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t4, s1 +; RV64I-NEXT: mv t5, s1 ; RV64I-NEXT: .LBB18_17: -; RV64I-NEXT: bltu s0, t0, .LBB18_19 +; RV64I-NEXT: bltu s0, t1, .LBB18_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, t0 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: sra t6, a5, t6 ; RV64I-NEXT: bnez s0, .LBB18_20 ; RV64I-NEXT: j .LBB18_21 ; RV64I-NEXT: .LBB18_19: -; RV64I-NEXT: srl t6, a7, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, a5, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, a5, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB18_21 ; RV64I-NEXT: .LBB18_20: -; RV64I-NEXT: mv a7, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB18_21: -; RV64I-NEXT: bltu s0, t0, .LBB18_23 +; RV64I-NEXT: bltu s0, t1, .LBB18_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: srai t0, a5, 63 -; RV64I-NEXT: bltu a6, t1, .LBB18_24 +; RV64I-NEXT: srai t1, a5, 63 +; RV64I-NEXT: bltu a7, t0, .LBB18_24 ; RV64I-NEXT: j .LBB18_25 ; RV64I-NEXT: .LBB18_23: -; RV64I-NEXT: sra t0, a5, s0 -; RV64I-NEXT: bgeu a6, t1, .LBB18_25 +; RV64I-NEXT: sra t1, a5, s0 +; RV64I-NEXT: bgeu a7, t0, .LBB18_25 ; RV64I-NEXT: .LBB18_24: -; RV64I-NEXT: or a7, t2, t5 -; RV64I-NEXT: or t0, t3, t4 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t1, t4, t5 ; RV64I-NEXT: .LBB18_25: -; RV64I-NEXT: bnez a6, .LBB18_29 +; RV64I-NEXT: bnez a7, .LBB18_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a6, t1, .LBB18_28 +; RV64I-NEXT: bltu a7, t0, .LBB18_28 ; RV64I-NEXT: .LBB18_27: ; RV64I-NEXT: srai a1, a5, 63 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB18_28: -; RV64I-NEXT: srli a5, a4, 32 -; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a5, a6, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a6, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a6, 24 +; RV64I-NEXT: and t1, a6, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a6, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli t0, a6, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a6, a6, 56 ; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a7, 5(a2) ; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: srli a5, a4, 32 +; RV64I-NEXT: srliw a6, a4, 16 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB18_29: -; RV64I-NEXT: mv a4, a7 -; RV64I-NEXT: mv a3, t0 -; RV64I-NEXT: bgeu a6, t1, .LBB18_27 +; RV64I-NEXT: mv a6, a3 +; RV64I-NEXT: mv a4, t1 +; RV64I-NEXT: bgeu a7, t0, .LBB18_27 ; RV64I-NEXT: j .LBB18_28 ; ; RV32I-LABEL: ashr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -80 -; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu t0, 2(a0) -; RV32I-NEXT: lbu t1, 3(a0) -; RV32I-NEXT: lbu t2, 4(a0) -; RV32I-NEXT: lbu t3, 5(a0) -; RV32I-NEXT: lbu t4, 6(a0) -; RV32I-NEXT: lbu t5, 7(a0) -; RV32I-NEXT: lbu t6, 8(a0) -; RV32I-NEXT: lbu s0, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s2, 11(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu a6, 14(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli t4, a4, 8 +; RV32I-NEXT: or a4, a3, a5 +; RV32I-NEXT: or t6, t4, a7 ; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: lbu a3, 9(a0) ; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu t4, 10(a0) +; RV32I-NEXT: lbu t5, 11(a0) +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, a3, 8 +; RV32I-NEXT: or a3, t0, a7 +; RV32I-NEXT: or a7, t1, a5 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: lbu t2, 29(a0) -; RV32I-NEXT: lbu t3, 30(a0) -; RV32I-NEXT: lbu t5, 31(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, t2, 8 -; RV32I-NEXT: or t2, s0, t6 -; RV32I-NEXT: or s0, s2, s1 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: lbu t6, 0(a1) -; RV32I-NEXT: lbu s2, 1(a1) -; RV32I-NEXT: lbu s3, 2(a1) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a5, 15(a0) +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t3, 29(a0) +; RV32I-NEXT: lbu s0, 0(a1) +; RV32I-NEXT: lbu s1, 1(a1) +; RV32I-NEXT: lbu s2, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s4, t5, t3 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: or s2, s2, t6 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu s3, 30(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: slli s1, s1, 8 +; RV32I-NEXT: or s0, s1, s0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, s3 -; RV32I-NEXT: slli t3, a3, 8 -; RV32I-NEXT: slli t6, t0, 16 -; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t5, s0, 16 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli a3, a1, 16 -; RV32I-NEXT: or s5, t4, t1 -; RV32I-NEXT: or a1, s4, s1 -; RV32I-NEXT: or t0, a3, s2 -; RV32I-NEXT: slli t0, t0, 3 -; RV32I-NEXT: srli t1, t0, 5 -; RV32I-NEXT: andi t4, t0, 31 -; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: or a1, a1, s2 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli t5, a5, 8 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a5, s4, s3 +; RV32I-NEXT: or a1, a1, s0 +; RV32I-NEXT: slli t4, a5, 16 +; RV32I-NEXT: slli a5, a1, 3 +; RV32I-NEXT: or a1, t4, t0 +; RV32I-NEXT: andi t4, a5, 31 +; RV32I-NEXT: srli t0, a5, 5 +; RV32I-NEXT: neg s3, t4 ; RV32I-NEXT: beqz t4, .LBB18_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a5, s5, a3 +; RV32I-NEXT: sll a6, a3, s3 ; RV32I-NEXT: .LBB18_2: -; RV32I-NEXT: or s10, t6, a4 -; RV32I-NEXT: lbu t6, 12(a0) -; RV32I-NEXT: lbu s0, 19(a0) -; RV32I-NEXT: slli s1, a7, 8 -; RV32I-NEXT: or a6, t3, a6 -; RV32I-NEXT: or a4, t5, t2 -; RV32I-NEXT: srai t2, a1, 31 -; RV32I-NEXT: beqz t1, .LBB18_4 +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s1, 19(a0) +; RV32I-NEXT: or s5, t6, a4 +; RV32I-NEXT: slli t6, t2, 8 +; RV32I-NEXT: or t5, t5, t1 +; RV32I-NEXT: or a4, t3, a7 +; RV32I-NEXT: srai t1, a1, 31 +; RV32I-NEXT: beqz t0, .LBB18_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: mv a6, t1 ; RV32I-NEXT: j .LBB18_5 ; RV32I-NEXT: .LBB18_4: -; RV32I-NEXT: srl a7, s10, t0 -; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: srl a7, s5, a5 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: .LBB18_5: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu s3, 17(a0) -; RV32I-NEXT: lbu t3, 18(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or s4, s1, t6 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: li s6, 1 -; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu t3, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) +; RV32I-NEXT: slli s4, s1, 8 +; RV32I-NEXT: or s6, t6, s0 +; RV32I-NEXT: slli s7, t5, 16 +; RV32I-NEXT: li s8, 1 +; RV32I-NEXT: sll s2, a4, s3 ; RV32I-NEXT: beqz t4, .LBB18_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a7, s2 +; RV32I-NEXT: mv t2, s2 ; RV32I-NEXT: .LBB18_7: ; RV32I-NEXT: lbu t5, 16(a0) ; RV32I-NEXT: lbu t6, 23(a0) -; RV32I-NEXT: slli s1, s3, 8 -; RV32I-NEXT: or s0, s0, t3 -; RV32I-NEXT: srl s3, s5, t0 -; RV32I-NEXT: or a6, a6, s4 -; RV32I-NEXT: bne t1, s6, .LBB18_9 +; RV32I-NEXT: slli s1, t3, 8 +; RV32I-NEXT: or s0, s4, a7 +; RV32I-NEXT: srl s4, a3, a5 +; RV32I-NEXT: or a7, s7, s6 +; RV32I-NEXT: bne t0, s8, .LBB18_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: or a6, s4, t2 ; RV32I-NEXT: .LBB18_9: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s6, 21(a0) -; RV32I-NEXT: lbu a7, 22(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s7, s1, t5 -; RV32I-NEXT: slli s8, s0, 16 -; RV32I-NEXT: li s9, 2 -; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: lbu s7, 21(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: slli s6, t6, 8 +; RV32I-NEXT: or s8, s1, t5 +; RV32I-NEXT: slli s9, s0, 16 +; RV32I-NEXT: li s10, 2 +; RV32I-NEXT: sll s0, a7, s3 ; RV32I-NEXT: beqz t4, .LBB18_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t3, s0 ; RV32I-NEXT: .LBB18_11: ; RV32I-NEXT: lbu t5, 20(a0) ; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli s6, s6, 8 -; RV32I-NEXT: or s4, s4, a7 -; RV32I-NEXT: srl s1, a4, t0 -; RV32I-NEXT: or a7, s8, s7 -; RV32I-NEXT: bne t1, s9, .LBB18_13 +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s6, s6, t2 +; RV32I-NEXT: srl s1, a4, a5 +; RV32I-NEXT: or t2, s9, s8 +; RV32I-NEXT: bne t0, s10, .LBB18_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or a5, s1, t3 +; RV32I-NEXT: or a6, s1, t3 ; RV32I-NEXT: .LBB18_13: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) ; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or s6, s6, t5 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: li s11, 3 -; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: or s7, s7, t5 +; RV32I-NEXT: slli s10, s6, 16 +; RV32I-NEXT: sll t5, t2, s3 ; RV32I-NEXT: beqz t4, .LBB18_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB18_15: -; RV32I-NEXT: lbu s4, 24(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: or s7, t6, s7 -; RV32I-NEXT: srl t6, a6, t0 -; RV32I-NEXT: or a0, s9, s6 -; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s11, .LBB18_17 +; RV32I-NEXT: lbu s6, 24(a0) +; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: or s8, t6, s8 +; RV32I-NEXT: srl t6, a7, a5 +; RV32I-NEXT: or a0, s10, s7 +; RV32I-NEXT: li s7, 3 +; RV32I-NEXT: bne t0, s7, .LBB18_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: or a6, t6, t3 ; RV32I-NEXT: .LBB18_17: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: or t3, s8, s4 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: li s10, 4 -; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: li s7, 0 +; RV32I-NEXT: or t3, s9, s6 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: li s6, 4 +; RV32I-NEXT: sll s9, a0, s3 +; RV32I-NEXT: sw s9, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB18_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: lw s7, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_19: -; RV32I-NEXT: srl s4, a7, t0 -; RV32I-NEXT: or t3, s7, t3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s10, .LBB18_21 +; RV32I-NEXT: srl ra, t2, a5 +; RV32I-NEXT: or t3, s8, t3 +; RV32I-NEXT: bne t0, s6, .LBB18_21 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: or a5, s4, s6 +; RV32I-NEXT: or a6, ra, s7 ; RV32I-NEXT: .LBB18_21: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s5, 5 -; RV32I-NEXT: sll s6, t3, a3 -; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s8, 5 +; RV32I-NEXT: sll s7, t3, s3 +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB18_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_23: -; RV32I-NEXT: srl s6, a0, t0 -; RV32I-NEXT: beq t1, s5, .LBB18_25 +; RV32I-NEXT: srl s7, a0, a5 +; RV32I-NEXT: beq t0, s8, .LBB18_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: mv s11, s7 ; RV32I-NEXT: j .LBB18_26 ; RV32I-NEXT: .LBB18_25: -; RV32I-NEXT: mv ra, s6 -; RV32I-NEXT: or a5, s6, s4 +; RV32I-NEXT: mv s11, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB18_26: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s8, 6 -; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s9, 6 +; RV32I-NEXT: sll s10, a1, s3 +; RV32I-NEXT: sw s10, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB18_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: mv s4, s7 +; RV32I-NEXT: lw s6, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_28: -; RV32I-NEXT: srl s5, t3, t0 -; RV32I-NEXT: beq t1, s8, .LBB18_30 +; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: srl s7, t3, a5 +; RV32I-NEXT: beq t0, s9, .LBB18_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: j .LBB18_31 ; RV32I-NEXT: .LBB18_30: -; RV32I-NEXT: mv s9, s5 -; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB18_31: ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s6, 7 -; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: sll s3, t1, s3 ; RV32I-NEXT: beqz t4, .LBB18_33 ; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: mv s5, s3 ; RV32I-NEXT: .LBB18_33: -; RV32I-NEXT: srl a3, a1, t0 -; RV32I-NEXT: bne t1, s6, .LBB18_35 +; RV32I-NEXT: srl s6, a1, a5 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: bne t0, s7, .LBB18_35 ; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: or a5, a3, s5 +; RV32I-NEXT: or a6, s6, s5 ; RV32I-NEXT: .LBB18_35: -; RV32I-NEXT: li s5, 3 -; RV32I-NEXT: mv s6, a3 -; RV32I-NEXT: bnez t0, .LBB18_39 +; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: beqz a5, .LBB18_37 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_40 +; RV32I-NEXT: mv s5, a6 ; RV32I-NEXT: .LBB18_37: -; RV32I-NEXT: beqz t1, .LBB18_41 -; RV32I-NEXT: .LBB18_38: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB18_42 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s7, 1 +; RV32I-NEXT: beqz t4, .LBB18_39 +; RV32I-NEXT: # %bb.38: +; RV32I-NEXT: mv a6, s2 ; RV32I-NEXT: .LBB18_39: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_37 -; RV32I-NEXT: .LBB18_40: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bnez t1, .LBB18_38 +; RV32I-NEXT: beqz t0, .LBB18_41 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB18_42 ; RV32I-NEXT: .LBB18_41: -; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: or a6, s4, a6 ; RV32I-NEXT: .LBB18_42: -; RV32I-NEXT: li s2, 1 -; RV32I-NEXT: li s3, 2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_61 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB18_66 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: beq t1, s2, .LBB18_62 +; RV32I-NEXT: beq t0, s7, .LBB18_67 ; RV32I-NEXT: .LBB18_44: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_63 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB18_68 ; RV32I-NEXT: .LBB18_45: -; RV32I-NEXT: beq t1, s3, .LBB18_64 +; RV32I-NEXT: beq t0, s4, .LBB18_69 ; RV32I-NEXT: .LBB18_46: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_65 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB18_70 ; RV32I-NEXT: .LBB18_47: -; RV32I-NEXT: beq t1, s5, .LBB18_66 +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: bne t0, s10, .LBB18_49 ; RV32I-NEXT: .LBB18_48: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_67 +; RV32I-NEXT: or a6, ra, s2 ; RV32I-NEXT: .LBB18_49: -; RV32I-NEXT: bne t1, s10, .LBB18_51 -; RV32I-NEXT: .LBB18_50: -; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: bnez t4, .LBB18_71 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: beq t0, s10, .LBB18_72 ; RV32I-NEXT: .LBB18_51: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li s10, 5 -; RV32I-NEXT: bnez t4, .LBB18_68 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: beq t1, s10, .LBB18_69 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB18_73 +; RV32I-NEXT: .LBB18_52: +; RV32I-NEXT: bne t0, s8, .LBB18_54 ; RV32I-NEXT: .LBB18_53: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_70 +; RV32I-NEXT: or a6, s9, s2 ; RV32I-NEXT: .LBB18_54: -; RV32I-NEXT: bne t1, s8, .LBB18_56 -; RV32I-NEXT: .LBB18_55: -; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: beqz t4, .LBB18_56 +; RV32I-NEXT: # %bb.55: +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB18_56: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: li s8, 7 -; RV32I-NEXT: bne t1, s8, .LBB18_71 +; RV32I-NEXT: bne t0, s8, .LBB18_58 ; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: bnez t0, .LBB18_72 +; RV32I-NEXT: or a6, s6, s2 ; RV32I-NEXT: .LBB18_58: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: bnez t4, .LBB18_73 -; RV32I-NEXT: .LBB18_59: -; RV32I-NEXT: beqz t1, .LBB18_74 +; RV32I-NEXT: mv s2, t1 +; RV32I-NEXT: li s10, 7 +; RV32I-NEXT: beq t0, s10, .LBB18_60 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: mv s2, a6 ; RV32I-NEXT: .LBB18_60: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB18_75 -; RV32I-NEXT: .LBB18_61: -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: bne t1, s2, .LBB18_44 +; RV32I-NEXT: beqz a5, .LBB18_62 +; RV32I-NEXT: # %bb.61: +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: .LBB18_62: -; RV32I-NEXT: or a5, s1, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_45 -; RV32I-NEXT: .LBB18_63: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s3, .LBB18_46 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: beqz t4, .LBB18_64 +; RV32I-NEXT: # %bb.63: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB18_64: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_47 -; RV32I-NEXT: .LBB18_65: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s5, .LBB18_48 +; RV32I-NEXT: li s2, 5 +; RV32I-NEXT: beqz t0, .LBB18_74 +; RV32I-NEXT: # %bb.65: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB18_75 ; RV32I-NEXT: .LBB18_66: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_49 +; RV32I-NEXT: mv s2, s0 +; RV32I-NEXT: bne t0, s7, .LBB18_44 ; RV32I-NEXT: .LBB18_67: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: beq t1, s10, .LBB18_50 -; RV32I-NEXT: j .LBB18_51 +; RV32I-NEXT: or a6, s1, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB18_45 ; RV32I-NEXT: .LBB18_68: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s10, .LBB18_53 +; RV32I-NEXT: mv s2, t5 +; RV32I-NEXT: bne t0, s4, .LBB18_46 ; RV32I-NEXT: .LBB18_69: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_54 +; RV32I-NEXT: or a6, t6, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB18_47 ; RV32I-NEXT: .LBB18_70: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: beq t1, s8, .LBB18_55 -; RV32I-NEXT: j .LBB18_56 +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: beq t0, s10, .LBB18_48 +; RV32I-NEXT: j .LBB18_49 ; RV32I-NEXT: .LBB18_71: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB18_58 +; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB18_51 ; RV32I-NEXT: .LBB18_72: -; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: beqz t4, .LBB18_59 +; RV32I-NEXT: or a6, s11, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB18_52 ; RV32I-NEXT: .LBB18_73: -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez t1, .LBB18_60 +; RV32I-NEXT: lw s2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t0, s8, .LBB18_53 +; RV32I-NEXT: j .LBB18_54 ; RV32I-NEXT: .LBB18_74: -; RV32I-NEXT: or a5, s1, a5 +; RV32I-NEXT: or a6, s1, a6 ; RV32I-NEXT: .LBB18_75: -; RV32I-NEXT: li s0, 4 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_91 +; RV32I-NEXT: li s1, 3 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB18_95 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: beq t1, s2, .LBB18_92 +; RV32I-NEXT: beq t0, s7, .LBB18_96 ; RV32I-NEXT: .LBB18_77: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_93 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB18_97 ; RV32I-NEXT: .LBB18_78: -; RV32I-NEXT: beq t1, s3, .LBB18_94 +; RV32I-NEXT: beq t0, s4, .LBB18_98 ; RV32I-NEXT: .LBB18_79: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_95 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB18_99 ; RV32I-NEXT: .LBB18_80: -; RV32I-NEXT: beq t1, s5, .LBB18_96 +; RV32I-NEXT: beq t0, s1, .LBB18_100 ; RV32I-NEXT: .LBB18_81: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_97 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB18_101 ; RV32I-NEXT: .LBB18_82: -; RV32I-NEXT: beq t1, s0, .LBB18_98 +; RV32I-NEXT: beq t0, s10, .LBB18_102 ; RV32I-NEXT: .LBB18_83: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_99 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB18_103 ; RV32I-NEXT: .LBB18_84: -; RV32I-NEXT: beq t1, s10, .LBB18_100 +; RV32I-NEXT: beq t0, s2, .LBB18_104 ; RV32I-NEXT: .LBB18_85: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_101 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_87 ; RV32I-NEXT: .LBB18_86: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB18_102 +; RV32I-NEXT: mv s0, a6 ; RV32I-NEXT: .LBB18_87: -; RV32I-NEXT: bnez t0, .LBB18_103 -; RV32I-NEXT: .LBB18_88: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_104 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: beq t0, s7, .LBB18_89 +; RV32I-NEXT: # %bb.88: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB18_89: -; RV32I-NEXT: beqz t1, .LBB18_105 -; RV32I-NEXT: .LBB18_90: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_106 -; RV32I-NEXT: j .LBB18_107 +; RV32I-NEXT: beqz a5, .LBB18_91 +; RV32I-NEXT: # %bb.90: +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: .LBB18_91: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s2, .LBB18_77 -; RV32I-NEXT: .LBB18_92: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_78 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: beqz t4, .LBB18_93 +; RV32I-NEXT: # %bb.92: +; RV32I-NEXT: mv a6, t5 ; RV32I-NEXT: .LBB18_93: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s3, .LBB18_79 -; RV32I-NEXT: .LBB18_94: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_80 +; RV32I-NEXT: beqz t0, .LBB18_105 +; RV32I-NEXT: # %bb.94: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB18_106 ; RV32I-NEXT: .LBB18_95: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s5, .LBB18_81 +; RV32I-NEXT: mv s0, t5 +; RV32I-NEXT: bne t0, s7, .LBB18_77 ; RV32I-NEXT: .LBB18_96: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_82 +; RV32I-NEXT: or a6, t6, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB18_78 ; RV32I-NEXT: .LBB18_97: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s0, .LBB18_83 +; RV32I-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB18_79 ; RV32I-NEXT: .LBB18_98: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_84 +; RV32I-NEXT: or a6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB18_80 ; RV32I-NEXT: .LBB18_99: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s10, .LBB18_85 +; RV32I-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB18_81 ; RV32I-NEXT: .LBB18_100: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_86 +; RV32I-NEXT: or a6, s11, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB18_82 ; RV32I-NEXT: .LBB18_101: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB18_87 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB18_83 ; RV32I-NEXT: .LBB18_102: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB18_88 +; RV32I-NEXT: or a6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB18_84 ; RV32I-NEXT: .LBB18_103: -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_89 +; RV32I-NEXT: mv s0, s3 +; RV32I-NEXT: bne t0, s2, .LBB18_85 ; RV32I-NEXT: .LBB18_104: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bnez t1, .LBB18_90 +; RV32I-NEXT: or a6, s6, s0 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_86 +; RV32I-NEXT: j .LBB18_87 ; RV32I-NEXT: .LBB18_105: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_107 +; RV32I-NEXT: or a6, t6, a6 ; RV32I-NEXT: .LBB18_106: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: .LBB18_107: -; RV32I-NEXT: beq t1, s2, .LBB18_121 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_122 +; RV32I-NEXT: li t6, 7 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_121 +; RV32I-NEXT: # %bb.107: +; RV32I-NEXT: beq t0, s0, .LBB18_122 +; RV32I-NEXT: .LBB18_108: +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_123 ; RV32I-NEXT: .LBB18_109: -; RV32I-NEXT: beq t1, s3, .LBB18_123 +; RV32I-NEXT: beq t0, s4, .LBB18_124 ; RV32I-NEXT: .LBB18_110: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_124 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_125 ; RV32I-NEXT: .LBB18_111: -; RV32I-NEXT: beq t1, s5, .LBB18_125 +; RV32I-NEXT: beq t0, s1, .LBB18_126 ; RV32I-NEXT: .LBB18_112: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_126 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_127 ; RV32I-NEXT: .LBB18_113: -; RV32I-NEXT: beq t1, s0, .LBB18_127 +; RV32I-NEXT: beq t0, s10, .LBB18_128 ; RV32I-NEXT: .LBB18_114: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB18_128 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB18_129 ; RV32I-NEXT: .LBB18_115: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_129 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_130 ; RV32I-NEXT: .LBB18_116: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB18_130 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB18_131 ; RV32I-NEXT: .LBB18_117: -; RV32I-NEXT: bnez t0, .LBB18_131 +; RV32I-NEXT: bnez a5, .LBB18_132 ; RV32I-NEXT: .LBB18_118: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_132 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB18_133 ; RV32I-NEXT: .LBB18_119: -; RV32I-NEXT: beqz t1, .LBB18_133 +; RV32I-NEXT: beqz t0, .LBB18_134 ; RV32I-NEXT: .LBB18_120: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB18_134 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_135 +; RV32I-NEXT: j .LBB18_136 ; RV32I-NEXT: .LBB18_121: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_109 +; RV32I-NEXT: lw t5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s0, .LBB18_108 ; RV32I-NEXT: .LBB18_122: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s3, .LBB18_110 +; RV32I-NEXT: or a6, ra, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_109 ; RV32I-NEXT: .LBB18_123: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_111 +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB18_110 ; RV32I-NEXT: .LBB18_124: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s5, .LBB18_112 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_111 ; RV32I-NEXT: .LBB18_125: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_113 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB18_112 ; RV32I-NEXT: .LBB18_126: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s0, .LBB18_114 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_113 ; RV32I-NEXT: .LBB18_127: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB18_115 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s10, .LBB18_114 ; RV32I-NEXT: .LBB18_128: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_116 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB18_115 ; RV32I-NEXT: .LBB18_129: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB18_117 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_116 ; RV32I-NEXT: .LBB18_130: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB18_118 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB18_117 ; RV32I-NEXT: .LBB18_131: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_119 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB18_118 ; RV32I-NEXT: .LBB18_132: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bnez t1, .LBB18_120 +; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB18_119 ; RV32I-NEXT: .LBB18_133: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: lw a6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB18_120 ; RV32I-NEXT: .LBB18_134: -; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_148 -; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: beq t1, s2, .LBB18_149 +; RV32I-NEXT: or a6, ra, a6 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_136 +; RV32I-NEXT: .LBB18_135: +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_136: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq t0, s0, .LBB18_149 +; RV32I-NEXT: # %bb.137: +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB18_150 -; RV32I-NEXT: .LBB18_137: -; RV32I-NEXT: beq t1, s3, .LBB18_151 ; RV32I-NEXT: .LBB18_138: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_152 +; RV32I-NEXT: beq t0, s4, .LBB18_151 ; RV32I-NEXT: .LBB18_139: -; RV32I-NEXT: beq t1, s5, .LBB18_153 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB18_152 ; RV32I-NEXT: .LBB18_140: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB18_154 +; RV32I-NEXT: beq t0, s1, .LBB18_153 ; RV32I-NEXT: .LBB18_141: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB18_155 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB18_154 ; RV32I-NEXT: .LBB18_142: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_156 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB18_155 ; RV32I-NEXT: .LBB18_143: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB18_157 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_156 ; RV32I-NEXT: .LBB18_144: -; RV32I-NEXT: bnez t0, .LBB18_158 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB18_157 ; RV32I-NEXT: .LBB18_145: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB18_159 +; RV32I-NEXT: bnez a5, .LBB18_158 ; RV32I-NEXT: .LBB18_146: -; RV32I-NEXT: beqz t1, .LBB18_160 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB18_159 ; RV32I-NEXT: .LBB18_147: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t0, .LBB18_160 +; RV32I-NEXT: .LBB18_148: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB18_161 ; RV32I-NEXT: j .LBB18_162 -; RV32I-NEXT: .LBB18_148: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s2, .LBB18_136 ; RV32I-NEXT: .LBB18_149: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_137 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_138 ; RV32I-NEXT: .LBB18_150: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s3, .LBB18_138 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB18_139 ; RV32I-NEXT: .LBB18_151: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_139 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB18_140 ; RV32I-NEXT: .LBB18_152: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s5, .LBB18_140 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s1, .LBB18_141 ; RV32I-NEXT: .LBB18_153: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB18_141 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB18_142 ; RV32I-NEXT: .LBB18_154: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB18_142 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB18_143 ; RV32I-NEXT: .LBB18_155: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_143 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_144 ; RV32I-NEXT: .LBB18_156: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB18_144 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB18_145 ; RV32I-NEXT: .LBB18_157: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB18_145 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB18_146 ; RV32I-NEXT: .LBB18_158: -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB18_146 +; RV32I-NEXT: mv t2, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB18_147 ; RV32I-NEXT: .LBB18_159: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB18_147 +; RV32I-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB18_148 ; RV32I-NEXT: .LBB18_160: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s11, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB18_162 ; RV32I-NEXT: .LBB18_161: -; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_162: -; RV32I-NEXT: beq t1, s2, .LBB18_174 +; RV32I-NEXT: beq t0, s0, .LBB18_174 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB18_175 ; RV32I-NEXT: .LBB18_164: -; RV32I-NEXT: beq t1, s3, .LBB18_176 +; RV32I-NEXT: beq t0, s4, .LBB18_176 ; RV32I-NEXT: .LBB18_165: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s5, .LBB18_177 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s1, .LBB18_177 ; RV32I-NEXT: .LBB18_166: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s0, .LBB18_178 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB18_178 ; RV32I-NEXT: .LBB18_167: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB18_179 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB18_179 ; RV32I-NEXT: .LBB18_168: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_180 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_180 ; RV32I-NEXT: .LBB18_169: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB18_181 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB18_181 ; RV32I-NEXT: .LBB18_170: -; RV32I-NEXT: bnez t0, .LBB18_182 +; RV32I-NEXT: bnez a5, .LBB18_182 ; RV32I-NEXT: .LBB18_171: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB18_183 ; RV32I-NEXT: .LBB18_172: -; RV32I-NEXT: beqz t1, .LBB18_184 +; RV32I-NEXT: beqz t0, .LBB18_184 ; RV32I-NEXT: .LBB18_173: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB18_185 ; RV32I-NEXT: j .LBB18_186 ; RV32I-NEXT: .LBB18_174: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB18_164 ; RV32I-NEXT: .LBB18_175: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s3, .LBB18_165 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s4, .LBB18_165 ; RV32I-NEXT: .LBB18_176: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s5, .LBB18_166 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s1, .LBB18_166 ; RV32I-NEXT: .LBB18_177: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s0, .LBB18_167 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB18_167 ; RV32I-NEXT: .LBB18_178: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB18_168 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB18_168 ; RV32I-NEXT: .LBB18_179: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_169 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_169 ; RV32I-NEXT: .LBB18_180: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB18_170 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB18_170 ; RV32I-NEXT: .LBB18_181: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB18_171 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB18_171 ; RV32I-NEXT: .LBB18_182: -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a0, t5 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB18_172 ; RV32I-NEXT: .LBB18_183: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bnez t1, .LBB18_173 +; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB18_173 ; RV32I-NEXT: .LBB18_184: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB18_186 ; RV32I-NEXT: .LBB18_185: -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv t5, s3 ; RV32I-NEXT: .LBB18_186: -; RV32I-NEXT: beq t1, s2, .LBB18_197 +; RV32I-NEXT: beq t0, s0, .LBB18_197 ; RV32I-NEXT: # %bb.187: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB18_198 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s4, .LBB18_198 ; RV32I-NEXT: .LBB18_188: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB18_199 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s1, .LBB18_199 ; RV32I-NEXT: .LBB18_189: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB18_200 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB18_200 ; RV32I-NEXT: .LBB18_190: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB18_201 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB18_201 ; RV32I-NEXT: .LBB18_191: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_202 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_202 ; RV32I-NEXT: .LBB18_192: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB18_203 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB18_203 ; RV32I-NEXT: .LBB18_193: -; RV32I-NEXT: bnez t0, .LBB18_204 +; RV32I-NEXT: bnez a5, .LBB18_204 ; RV32I-NEXT: .LBB18_194: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB18_205 ; RV32I-NEXT: .LBB18_195: -; RV32I-NEXT: beqz t1, .LBB18_206 +; RV32I-NEXT: beqz t0, .LBB18_206 ; RV32I-NEXT: .LBB18_196: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s2, .LBB18_207 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s0, .LBB18_207 ; RV32I-NEXT: j .LBB18_208 ; RV32I-NEXT: .LBB18_197: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s3, .LBB18_188 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s4, .LBB18_188 ; RV32I-NEXT: .LBB18_198: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB18_189 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s1, .LBB18_189 ; RV32I-NEXT: .LBB18_199: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB18_190 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB18_190 ; RV32I-NEXT: .LBB18_200: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB18_191 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB18_191 ; RV32I-NEXT: .LBB18_201: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_192 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_192 ; RV32I-NEXT: .LBB18_202: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB18_193 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB18_193 ; RV32I-NEXT: .LBB18_203: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB18_194 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB18_194 ; RV32I-NEXT: .LBB18_204: -; RV32I-NEXT: mv t3, a5 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv t3, a6 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB18_195 ; RV32I-NEXT: .LBB18_205: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bnez t1, .LBB18_196 +; RV32I-NEXT: mv a6, s3 +; RV32I-NEXT: bnez t0, .LBB18_196 ; RV32I-NEXT: .LBB18_206: -; RV32I-NEXT: or a3, s6, a3 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s2, .LBB18_208 +; RV32I-NEXT: or a6, s6, a6 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s0, .LBB18_208 ; RV32I-NEXT: .LBB18_207: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv t4, a6 ; RV32I-NEXT: .LBB18_208: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB18_217 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s4, .LBB18_217 ; RV32I-NEXT: # %bb.209: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB18_218 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s1, .LBB18_218 ; RV32I-NEXT: .LBB18_210: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB18_219 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB18_219 ; RV32I-NEXT: .LBB18_211: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB18_220 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s2, .LBB18_220 ; RV32I-NEXT: .LBB18_212: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB18_221 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB18_221 ; RV32I-NEXT: .LBB18_213: -; RV32I-NEXT: bne t1, s8, .LBB18_222 +; RV32I-NEXT: bne t0, t6, .LBB18_222 ; RV32I-NEXT: .LBB18_214: -; RV32I-NEXT: beqz t0, .LBB18_216 +; RV32I-NEXT: beqz a5, .LBB18_216 ; RV32I-NEXT: .LBB18_215: -; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB18_216: -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli a5, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t2, ra, 24 -; RV32I-NEXT: srli t0, s11, 16 -; RV32I-NEXT: srli t6, s11, 24 -; RV32I-NEXT: srli t1, a4, 16 -; RV32I-NEXT: srli s2, a4, 24 -; RV32I-NEXT: srli t5, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli s1, a7, 16 -; RV32I-NEXT: srli a3, a7, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s4, t3, 16 -; RV32I-NEXT: srli s6, t3, 24 -; RV32I-NEXT: srli s7, a1, 16 -; RV32I-NEXT: srli s8, a1, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s9, ra, t4 -; RV32I-NEXT: and s10, s11, t4 -; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb t2, 3(a2) -; RV32I-NEXT: and a5, a4, t4 -; RV32I-NEXT: srli t2, s10, 8 -; RV32I-NEXT: sb s11, 4(a2) -; RV32I-NEXT: sb t2, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and t0, a6, t4 -; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: lui a5, 16 +; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: srli a6, s5, 16 +; RV32I-NEXT: and t0, s5, a5 +; RV32I-NEXT: srli t1, s5, 24 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb s5, 0(a2) +; RV32I-NEXT: sb t0, 1(a2) +; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: srli a6, a3, 16 +; RV32I-NEXT: and t0, a3, a5 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: srli t1, a3, 24 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb t0, 5(a2) +; RV32I-NEXT: sb a6, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: srli a3, a4, 16 +; RV32I-NEXT: and a6, a4, a5 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: srli t0, a4, 24 ; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a7, t4 -; RV32I-NEXT: srli a5, t0, 8 -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a5, a0, t4 +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb t0, 11(a2) +; RV32I-NEXT: srli a3, a7, 16 +; RV32I-NEXT: and a4, a7, a5 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: sb a7, 12(a2) +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a6, 15(a2) +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: and a4, t2, a5 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: srli a6, t2, 24 +; RV32I-NEXT: sb t2, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, t3, t4 -; RV32I-NEXT: and a4, a1, t4 -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a5 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 20(a2) -; RV32I-NEXT: sb a5, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a6, 23(a2) +; RV32I-NEXT: srli a0, t3, 16 +; RV32I-NEXT: and a3, t3, a5 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, t3, 24 ; RV32I-NEXT: sb t3, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a0, 26(a2) +; RV32I-NEXT: sb a4, 27(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a5, a1, a5 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a1, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB18_217: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB18_210 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s1, .LBB18_210 ; RV32I-NEXT: .LBB18_218: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB18_211 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB18_211 ; RV32I-NEXT: .LBB18_219: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB18_212 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s2, .LBB18_212 ; RV32I-NEXT: .LBB18_220: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB18_213 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB18_213 ; RV32I-NEXT: .LBB18_221: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beq t1, s8, .LBB18_214 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beq t0, t6, .LBB18_214 ; RV32I-NEXT: .LBB18_222: -; RV32I-NEXT: mv t2, a3 -; RV32I-NEXT: bnez t0, .LBB18_215 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: bnez a5, .LBB18_215 ; RV32I-NEXT: j .LBB18_216 %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -10698,1201 +10619,1175 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a6, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a7, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a5, 16 +; RV64I-NEXT: slli a0, a7, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli t3, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: or a7, t5, t4 +; RV64I-NEXT: slli a5, a5, 32 ; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 -; RV64I-NEXT: slli a6, a6, 5 -; RV64I-NEXT: sub t1, a6, t0 -; RV64I-NEXT: neg t5, a6 -; RV64I-NEXT: sll t4, a5, t5 -; RV64I-NEXT: bltu a6, t0, .LBB19_2 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: or a5, t1, t3 +; RV64I-NEXT: slli a7, a7, 5 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a7 +; RV64I-NEXT: sub t4, a7, t1 +; RV64I-NEXT: sll t5, a5, t3 +; RV64I-NEXT: bltu a7, t1, .LBB19_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra t6, a5, t1 +; RV64I-NEXT: sra t6, a5, t4 ; RV64I-NEXT: j .LBB19_3 ; RV64I-NEXT: .LBB19_2: -; RV64I-NEXT: srl t6, a7, a6 -; RV64I-NEXT: or t6, t6, t4 +; RV64I-NEXT: srl t6, a3, a7 +; RV64I-NEXT: or t6, t6, t5 ; RV64I-NEXT: .LBB19_3: -; RV64I-NEXT: or a3, t3, a3 -; RV64I-NEXT: slli t3, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a7 -; RV64I-NEXT: beqz a6, .LBB19_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a7, .LBB19_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB19_5: -; RV64I-NEXT: or a4, t3, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a6, t0, .LBB19_7 +; RV64I-NEXT: or a6, a6, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a7, t1, .LBB19_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: srai a0, a5, 63 -; RV64I-NEXT: srl t3, a3, t1 +; RV64I-NEXT: srl t4, a4, t4 ; RV64I-NEXT: j .LBB19_8 ; RV64I-NEXT: .LBB19_7: -; RV64I-NEXT: sra a0, a5, a6 -; RV64I-NEXT: srl t1, a4, a6 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t3, t1, t2 +; RV64I-NEXT: srl t0, a6, a7 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: sra a0, a5, a7 +; RV64I-NEXT: or t4, t0, t2 ; RV64I-NEXT: .LBB19_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a6, .LBB19_10 +; RV64I-NEXT: li t0, 128 +; RV64I-NEXT: mv t2, a6 +; RV64I-NEXT: beqz a7, .LBB19_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t3 +; RV64I-NEXT: mv t2, t4 ; RV64I-NEXT: .LBB19_10: -; RV64I-NEXT: sub t6, t1, a6 -; RV64I-NEXT: bltu a6, t0, .LBB19_13 +; RV64I-NEXT: sub t6, t0, a7 +; RV64I-NEXT: bltu a7, t1, .LBB19_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t3, 0 -; RV64I-NEXT: bgeu t6, t0, .LBB19_14 +; RV64I-NEXT: li t4, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB19_14 ; RV64I-NEXT: .LBB19_12: -; RV64I-NEXT: sll t5, a7, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a7, s0 -; RV64I-NEXT: or s1, s0, t4 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t5 ; RV64I-NEXT: j .LBB19_15 ; RV64I-NEXT: .LBB19_13: -; RV64I-NEXT: srl t3, a3, a6 -; RV64I-NEXT: bltu t6, t0, .LBB19_12 +; RV64I-NEXT: srl t4, a4, a7 +; RV64I-NEXT: bltu t6, t1, .LBB19_12 ; RV64I-NEXT: .LBB19_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t4, t6, t0 -; RV64I-NEXT: sll s1, a7, t4 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t5, t6, t1 +; RV64I-NEXT: sll s1, a3, t5 ; RV64I-NEXT: .LBB19_15: -; RV64I-NEXT: sub s0, a6, t1 -; RV64I-NEXT: mv t4, a5 +; RV64I-NEXT: sub s0, a7, t0 +; RV64I-NEXT: mv t5, a5 ; RV64I-NEXT: beqz t6, .LBB19_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t4, s1 +; RV64I-NEXT: mv t5, s1 ; RV64I-NEXT: .LBB19_17: -; RV64I-NEXT: bltu s0, t0, .LBB19_19 +; RV64I-NEXT: bltu s0, t1, .LBB19_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, t0 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: sra t6, a5, t6 ; RV64I-NEXT: bnez s0, .LBB19_20 ; RV64I-NEXT: j .LBB19_21 ; RV64I-NEXT: .LBB19_19: -; RV64I-NEXT: srl t6, a7, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, a5, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, a5, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB19_21 ; RV64I-NEXT: .LBB19_20: -; RV64I-NEXT: mv a7, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB19_21: -; RV64I-NEXT: bltu s0, t0, .LBB19_23 +; RV64I-NEXT: bltu s0, t1, .LBB19_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: srai t0, a5, 63 -; RV64I-NEXT: bltu a6, t1, .LBB19_24 +; RV64I-NEXT: srai t1, a5, 63 +; RV64I-NEXT: bltu a7, t0, .LBB19_24 ; RV64I-NEXT: j .LBB19_25 ; RV64I-NEXT: .LBB19_23: -; RV64I-NEXT: sra t0, a5, s0 -; RV64I-NEXT: bgeu a6, t1, .LBB19_25 +; RV64I-NEXT: sra t1, a5, s0 +; RV64I-NEXT: bgeu a7, t0, .LBB19_25 ; RV64I-NEXT: .LBB19_24: -; RV64I-NEXT: or a7, t2, t5 -; RV64I-NEXT: or t0, t3, t4 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t1, t4, t5 ; RV64I-NEXT: .LBB19_25: -; RV64I-NEXT: bnez a6, .LBB19_29 +; RV64I-NEXT: bnez a7, .LBB19_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a6, t1, .LBB19_28 +; RV64I-NEXT: bltu a7, t0, .LBB19_28 ; RV64I-NEXT: .LBB19_27: ; RV64I-NEXT: srai a1, a5, 63 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB19_28: -; RV64I-NEXT: srli a5, a4, 32 -; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a5, a6, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a6, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a6, 24 +; RV64I-NEXT: and t1, a6, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a6, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli t0, a6, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a6, a6, 56 ; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a7, 5(a2) ; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: srli a5, a4, 32 +; RV64I-NEXT: srliw a6, a4, 16 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB19_29: -; RV64I-NEXT: mv a4, a7 -; RV64I-NEXT: mv a3, t0 -; RV64I-NEXT: bgeu a6, t1, .LBB19_27 +; RV64I-NEXT: mv a6, a3 +; RV64I-NEXT: mv a4, t1 +; RV64I-NEXT: bgeu a7, t0, .LBB19_27 ; RV64I-NEXT: j .LBB19_28 ; ; RV32I-LABEL: ashr_32bytes_wordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -80 -; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu t0, 2(a0) -; RV32I-NEXT: lbu t1, 3(a0) -; RV32I-NEXT: lbu t2, 4(a0) -; RV32I-NEXT: lbu t3, 5(a0) -; RV32I-NEXT: lbu t4, 6(a0) -; RV32I-NEXT: lbu t5, 7(a0) -; RV32I-NEXT: lbu t6, 8(a0) -; RV32I-NEXT: lbu s0, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s2, 11(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu a6, 14(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli t4, a4, 8 +; RV32I-NEXT: or a4, a3, a5 +; RV32I-NEXT: or t6, t4, a7 ; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: lbu a3, 9(a0) ; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu t4, 10(a0) +; RV32I-NEXT: lbu t5, 11(a0) +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, a3, 8 +; RV32I-NEXT: or a3, t0, a7 +; RV32I-NEXT: or a7, t1, a5 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: lbu t2, 29(a0) -; RV32I-NEXT: lbu t3, 30(a0) -; RV32I-NEXT: lbu t5, 31(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, t2, 8 -; RV32I-NEXT: or t2, s0, t6 -; RV32I-NEXT: or s0, s2, s1 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: lbu t6, 0(a1) -; RV32I-NEXT: lbu s2, 1(a1) -; RV32I-NEXT: lbu s3, 2(a1) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a5, 15(a0) +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t3, 29(a0) +; RV32I-NEXT: lbu s0, 0(a1) +; RV32I-NEXT: lbu s1, 1(a1) +; RV32I-NEXT: lbu s2, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s4, t5, t3 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: or s2, s2, t6 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu s3, 30(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: slli s1, s1, 8 +; RV32I-NEXT: or s0, s1, s0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, s3 -; RV32I-NEXT: slli t3, a3, 8 -; RV32I-NEXT: slli t6, t0, 16 -; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t5, s0, 16 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli a3, a1, 16 -; RV32I-NEXT: or s5, t4, t1 -; RV32I-NEXT: or a1, s4, s1 -; RV32I-NEXT: or t0, a3, s2 -; RV32I-NEXT: slli t0, t0, 5 -; RV32I-NEXT: srli t1, t0, 5 -; RV32I-NEXT: andi t4, t0, 31 -; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: or a1, a1, s2 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli t5, a5, 8 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a5, s4, s3 +; RV32I-NEXT: or a1, a1, s0 +; RV32I-NEXT: slli t4, a5, 16 +; RV32I-NEXT: slli a5, a1, 5 +; RV32I-NEXT: or a1, t4, t0 +; RV32I-NEXT: andi t4, a5, 31 +; RV32I-NEXT: srli t0, a5, 5 +; RV32I-NEXT: neg s3, t4 ; RV32I-NEXT: beqz t4, .LBB19_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a5, s5, a3 +; RV32I-NEXT: sll a6, a3, s3 ; RV32I-NEXT: .LBB19_2: -; RV32I-NEXT: or s10, t6, a4 -; RV32I-NEXT: lbu t6, 12(a0) -; RV32I-NEXT: lbu s0, 19(a0) -; RV32I-NEXT: slli s1, a7, 8 -; RV32I-NEXT: or a6, t3, a6 -; RV32I-NEXT: or a4, t5, t2 -; RV32I-NEXT: srai t2, a1, 31 -; RV32I-NEXT: beqz t1, .LBB19_4 +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s1, 19(a0) +; RV32I-NEXT: or s5, t6, a4 +; RV32I-NEXT: slli t6, t2, 8 +; RV32I-NEXT: or t5, t5, t1 +; RV32I-NEXT: or a4, t3, a7 +; RV32I-NEXT: srai t1, a1, 31 +; RV32I-NEXT: beqz t0, .LBB19_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: mv a6, t1 ; RV32I-NEXT: j .LBB19_5 ; RV32I-NEXT: .LBB19_4: -; RV32I-NEXT: srl a7, s10, t0 -; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: srl a7, s5, a5 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: .LBB19_5: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu s3, 17(a0) -; RV32I-NEXT: lbu t3, 18(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or s4, s1, t6 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: li s6, 1 -; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu t3, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) +; RV32I-NEXT: slli s4, s1, 8 +; RV32I-NEXT: or s6, t6, s0 +; RV32I-NEXT: slli s7, t5, 16 +; RV32I-NEXT: li s8, 1 +; RV32I-NEXT: sll s2, a4, s3 ; RV32I-NEXT: beqz t4, .LBB19_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a7, s2 +; RV32I-NEXT: mv t2, s2 ; RV32I-NEXT: .LBB19_7: ; RV32I-NEXT: lbu t5, 16(a0) ; RV32I-NEXT: lbu t6, 23(a0) -; RV32I-NEXT: slli s1, s3, 8 -; RV32I-NEXT: or s0, s0, t3 -; RV32I-NEXT: srl s3, s5, t0 -; RV32I-NEXT: or a6, a6, s4 -; RV32I-NEXT: bne t1, s6, .LBB19_9 +; RV32I-NEXT: slli s1, t3, 8 +; RV32I-NEXT: or s0, s4, a7 +; RV32I-NEXT: srl s4, a3, a5 +; RV32I-NEXT: or a7, s7, s6 +; RV32I-NEXT: bne t0, s8, .LBB19_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: or a6, s4, t2 ; RV32I-NEXT: .LBB19_9: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s6, 21(a0) -; RV32I-NEXT: lbu a7, 22(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s7, s1, t5 -; RV32I-NEXT: slli s8, s0, 16 -; RV32I-NEXT: li s9, 2 -; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: lbu s7, 21(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: slli s6, t6, 8 +; RV32I-NEXT: or s8, s1, t5 +; RV32I-NEXT: slli s9, s0, 16 +; RV32I-NEXT: li s10, 2 +; RV32I-NEXT: sll s0, a7, s3 ; RV32I-NEXT: beqz t4, .LBB19_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t3, s0 ; RV32I-NEXT: .LBB19_11: ; RV32I-NEXT: lbu t5, 20(a0) ; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli s6, s6, 8 -; RV32I-NEXT: or s4, s4, a7 -; RV32I-NEXT: srl s1, a4, t0 -; RV32I-NEXT: or a7, s8, s7 -; RV32I-NEXT: bne t1, s9, .LBB19_13 +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s6, s6, t2 +; RV32I-NEXT: srl s1, a4, a5 +; RV32I-NEXT: or t2, s9, s8 +; RV32I-NEXT: bne t0, s10, .LBB19_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or a5, s1, t3 +; RV32I-NEXT: or a6, s1, t3 ; RV32I-NEXT: .LBB19_13: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) ; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or s6, s6, t5 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: li s11, 3 -; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: or s7, s7, t5 +; RV32I-NEXT: slli s10, s6, 16 +; RV32I-NEXT: sll t5, t2, s3 ; RV32I-NEXT: beqz t4, .LBB19_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB19_15: -; RV32I-NEXT: lbu s4, 24(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: or s7, t6, s7 -; RV32I-NEXT: srl t6, a6, t0 -; RV32I-NEXT: or a0, s9, s6 -; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s11, .LBB19_17 +; RV32I-NEXT: lbu s6, 24(a0) +; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: or s8, t6, s8 +; RV32I-NEXT: srl t6, a7, a5 +; RV32I-NEXT: or a0, s10, s7 +; RV32I-NEXT: li s7, 3 +; RV32I-NEXT: bne t0, s7, .LBB19_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: or a6, t6, t3 ; RV32I-NEXT: .LBB19_17: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: or t3, s8, s4 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: li s10, 4 -; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: li s7, 0 +; RV32I-NEXT: or t3, s9, s6 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: li s6, 4 +; RV32I-NEXT: sll s9, a0, s3 +; RV32I-NEXT: sw s9, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB19_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: lw s7, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_19: -; RV32I-NEXT: srl s4, a7, t0 -; RV32I-NEXT: or t3, s7, t3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s10, .LBB19_21 +; RV32I-NEXT: srl ra, t2, a5 +; RV32I-NEXT: or t3, s8, t3 +; RV32I-NEXT: bne t0, s6, .LBB19_21 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: or a5, s4, s6 +; RV32I-NEXT: or a6, ra, s7 ; RV32I-NEXT: .LBB19_21: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s5, 5 -; RV32I-NEXT: sll s6, t3, a3 -; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s8, 5 +; RV32I-NEXT: sll s7, t3, s3 +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB19_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_23: -; RV32I-NEXT: srl s6, a0, t0 -; RV32I-NEXT: beq t1, s5, .LBB19_25 +; RV32I-NEXT: srl s7, a0, a5 +; RV32I-NEXT: beq t0, s8, .LBB19_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: mv s11, s7 ; RV32I-NEXT: j .LBB19_26 ; RV32I-NEXT: .LBB19_25: -; RV32I-NEXT: mv ra, s6 -; RV32I-NEXT: or a5, s6, s4 +; RV32I-NEXT: mv s11, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB19_26: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s8, 6 -; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s9, 6 +; RV32I-NEXT: sll s10, a1, s3 +; RV32I-NEXT: sw s10, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB19_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: mv s4, s7 +; RV32I-NEXT: lw s6, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_28: -; RV32I-NEXT: srl s5, t3, t0 -; RV32I-NEXT: beq t1, s8, .LBB19_30 +; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: srl s7, t3, a5 +; RV32I-NEXT: beq t0, s9, .LBB19_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: j .LBB19_31 ; RV32I-NEXT: .LBB19_30: -; RV32I-NEXT: mv s9, s5 -; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB19_31: ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s6, 7 -; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: sll s3, t1, s3 ; RV32I-NEXT: beqz t4, .LBB19_33 ; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: mv s5, s3 ; RV32I-NEXT: .LBB19_33: -; RV32I-NEXT: srl a3, a1, t0 -; RV32I-NEXT: bne t1, s6, .LBB19_35 +; RV32I-NEXT: srl s6, a1, a5 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: bne t0, s7, .LBB19_35 ; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: or a5, a3, s5 +; RV32I-NEXT: or a6, s6, s5 ; RV32I-NEXT: .LBB19_35: -; RV32I-NEXT: li s5, 3 -; RV32I-NEXT: mv s6, a3 -; RV32I-NEXT: bnez t0, .LBB19_39 +; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: beqz a5, .LBB19_37 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_40 +; RV32I-NEXT: mv s5, a6 ; RV32I-NEXT: .LBB19_37: -; RV32I-NEXT: beqz t1, .LBB19_41 -; RV32I-NEXT: .LBB19_38: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB19_42 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s7, 1 +; RV32I-NEXT: beqz t4, .LBB19_39 +; RV32I-NEXT: # %bb.38: +; RV32I-NEXT: mv a6, s2 ; RV32I-NEXT: .LBB19_39: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_37 -; RV32I-NEXT: .LBB19_40: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bnez t1, .LBB19_38 +; RV32I-NEXT: beqz t0, .LBB19_41 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB19_42 ; RV32I-NEXT: .LBB19_41: -; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: or a6, s4, a6 ; RV32I-NEXT: .LBB19_42: -; RV32I-NEXT: li s2, 1 -; RV32I-NEXT: li s3, 2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_61 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB19_66 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: beq t1, s2, .LBB19_62 +; RV32I-NEXT: beq t0, s7, .LBB19_67 ; RV32I-NEXT: .LBB19_44: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_63 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB19_68 ; RV32I-NEXT: .LBB19_45: -; RV32I-NEXT: beq t1, s3, .LBB19_64 +; RV32I-NEXT: beq t0, s4, .LBB19_69 ; RV32I-NEXT: .LBB19_46: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_65 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB19_70 ; RV32I-NEXT: .LBB19_47: -; RV32I-NEXT: beq t1, s5, .LBB19_66 +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: bne t0, s10, .LBB19_49 ; RV32I-NEXT: .LBB19_48: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_67 +; RV32I-NEXT: or a6, ra, s2 ; RV32I-NEXT: .LBB19_49: -; RV32I-NEXT: bne t1, s10, .LBB19_51 -; RV32I-NEXT: .LBB19_50: -; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: bnez t4, .LBB19_71 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: beq t0, s10, .LBB19_72 ; RV32I-NEXT: .LBB19_51: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li s10, 5 -; RV32I-NEXT: bnez t4, .LBB19_68 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: beq t1, s10, .LBB19_69 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB19_73 +; RV32I-NEXT: .LBB19_52: +; RV32I-NEXT: bne t0, s8, .LBB19_54 ; RV32I-NEXT: .LBB19_53: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_70 +; RV32I-NEXT: or a6, s9, s2 ; RV32I-NEXT: .LBB19_54: -; RV32I-NEXT: bne t1, s8, .LBB19_56 -; RV32I-NEXT: .LBB19_55: -; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: beqz t4, .LBB19_56 +; RV32I-NEXT: # %bb.55: +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB19_56: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: li s8, 7 -; RV32I-NEXT: bne t1, s8, .LBB19_71 +; RV32I-NEXT: bne t0, s8, .LBB19_58 ; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: bnez t0, .LBB19_72 +; RV32I-NEXT: or a6, s6, s2 ; RV32I-NEXT: .LBB19_58: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: bnez t4, .LBB19_73 -; RV32I-NEXT: .LBB19_59: -; RV32I-NEXT: beqz t1, .LBB19_74 +; RV32I-NEXT: mv s2, t1 +; RV32I-NEXT: li s10, 7 +; RV32I-NEXT: beq t0, s10, .LBB19_60 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: mv s2, a6 ; RV32I-NEXT: .LBB19_60: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB19_75 -; RV32I-NEXT: .LBB19_61: -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: bne t1, s2, .LBB19_44 +; RV32I-NEXT: beqz a5, .LBB19_62 +; RV32I-NEXT: # %bb.61: +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: .LBB19_62: -; RV32I-NEXT: or a5, s1, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_45 -; RV32I-NEXT: .LBB19_63: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s3, .LBB19_46 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: beqz t4, .LBB19_64 +; RV32I-NEXT: # %bb.63: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB19_64: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_47 -; RV32I-NEXT: .LBB19_65: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s5, .LBB19_48 +; RV32I-NEXT: li s2, 5 +; RV32I-NEXT: beqz t0, .LBB19_74 +; RV32I-NEXT: # %bb.65: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB19_75 ; RV32I-NEXT: .LBB19_66: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_49 +; RV32I-NEXT: mv s2, s0 +; RV32I-NEXT: bne t0, s7, .LBB19_44 ; RV32I-NEXT: .LBB19_67: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: beq t1, s10, .LBB19_50 -; RV32I-NEXT: j .LBB19_51 +; RV32I-NEXT: or a6, s1, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB19_45 ; RV32I-NEXT: .LBB19_68: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s10, .LBB19_53 +; RV32I-NEXT: mv s2, t5 +; RV32I-NEXT: bne t0, s4, .LBB19_46 ; RV32I-NEXT: .LBB19_69: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_54 +; RV32I-NEXT: or a6, t6, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB19_47 ; RV32I-NEXT: .LBB19_70: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: beq t1, s8, .LBB19_55 -; RV32I-NEXT: j .LBB19_56 +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: beq t0, s10, .LBB19_48 +; RV32I-NEXT: j .LBB19_49 ; RV32I-NEXT: .LBB19_71: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB19_58 +; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB19_51 ; RV32I-NEXT: .LBB19_72: -; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: beqz t4, .LBB19_59 +; RV32I-NEXT: or a6, s11, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB19_52 ; RV32I-NEXT: .LBB19_73: -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez t1, .LBB19_60 +; RV32I-NEXT: lw s2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t0, s8, .LBB19_53 +; RV32I-NEXT: j .LBB19_54 ; RV32I-NEXT: .LBB19_74: -; RV32I-NEXT: or a5, s1, a5 +; RV32I-NEXT: or a6, s1, a6 ; RV32I-NEXT: .LBB19_75: -; RV32I-NEXT: li s0, 4 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_91 +; RV32I-NEXT: li s1, 3 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB19_95 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: beq t1, s2, .LBB19_92 +; RV32I-NEXT: beq t0, s7, .LBB19_96 ; RV32I-NEXT: .LBB19_77: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_93 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB19_97 ; RV32I-NEXT: .LBB19_78: -; RV32I-NEXT: beq t1, s3, .LBB19_94 +; RV32I-NEXT: beq t0, s4, .LBB19_98 ; RV32I-NEXT: .LBB19_79: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_95 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB19_99 ; RV32I-NEXT: .LBB19_80: -; RV32I-NEXT: beq t1, s5, .LBB19_96 +; RV32I-NEXT: beq t0, s1, .LBB19_100 ; RV32I-NEXT: .LBB19_81: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_97 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB19_101 ; RV32I-NEXT: .LBB19_82: -; RV32I-NEXT: beq t1, s0, .LBB19_98 +; RV32I-NEXT: beq t0, s10, .LBB19_102 ; RV32I-NEXT: .LBB19_83: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_99 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB19_103 ; RV32I-NEXT: .LBB19_84: -; RV32I-NEXT: beq t1, s10, .LBB19_100 +; RV32I-NEXT: beq t0, s2, .LBB19_104 ; RV32I-NEXT: .LBB19_85: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_101 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_87 ; RV32I-NEXT: .LBB19_86: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB19_102 +; RV32I-NEXT: mv s0, a6 ; RV32I-NEXT: .LBB19_87: -; RV32I-NEXT: bnez t0, .LBB19_103 -; RV32I-NEXT: .LBB19_88: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_104 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: beq t0, s7, .LBB19_89 +; RV32I-NEXT: # %bb.88: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB19_89: -; RV32I-NEXT: beqz t1, .LBB19_105 -; RV32I-NEXT: .LBB19_90: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_106 -; RV32I-NEXT: j .LBB19_107 +; RV32I-NEXT: beqz a5, .LBB19_91 +; RV32I-NEXT: # %bb.90: +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: .LBB19_91: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s2, .LBB19_77 -; RV32I-NEXT: .LBB19_92: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_78 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: beqz t4, .LBB19_93 +; RV32I-NEXT: # %bb.92: +; RV32I-NEXT: mv a6, t5 ; RV32I-NEXT: .LBB19_93: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s3, .LBB19_79 -; RV32I-NEXT: .LBB19_94: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_80 +; RV32I-NEXT: beqz t0, .LBB19_105 +; RV32I-NEXT: # %bb.94: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB19_106 ; RV32I-NEXT: .LBB19_95: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s5, .LBB19_81 +; RV32I-NEXT: mv s0, t5 +; RV32I-NEXT: bne t0, s7, .LBB19_77 ; RV32I-NEXT: .LBB19_96: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_82 +; RV32I-NEXT: or a6, t6, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB19_78 ; RV32I-NEXT: .LBB19_97: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s0, .LBB19_83 +; RV32I-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB19_79 ; RV32I-NEXT: .LBB19_98: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_84 +; RV32I-NEXT: or a6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB19_80 ; RV32I-NEXT: .LBB19_99: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s10, .LBB19_85 +; RV32I-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB19_81 ; RV32I-NEXT: .LBB19_100: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_86 +; RV32I-NEXT: or a6, s11, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB19_82 ; RV32I-NEXT: .LBB19_101: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB19_87 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB19_83 ; RV32I-NEXT: .LBB19_102: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB19_88 +; RV32I-NEXT: or a6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB19_84 ; RV32I-NEXT: .LBB19_103: -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_89 +; RV32I-NEXT: mv s0, s3 +; RV32I-NEXT: bne t0, s2, .LBB19_85 ; RV32I-NEXT: .LBB19_104: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bnez t1, .LBB19_90 +; RV32I-NEXT: or a6, s6, s0 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_86 +; RV32I-NEXT: j .LBB19_87 ; RV32I-NEXT: .LBB19_105: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_107 +; RV32I-NEXT: or a6, t6, a6 ; RV32I-NEXT: .LBB19_106: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: .LBB19_107: -; RV32I-NEXT: beq t1, s2, .LBB19_121 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_122 +; RV32I-NEXT: li t6, 7 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_121 +; RV32I-NEXT: # %bb.107: +; RV32I-NEXT: beq t0, s0, .LBB19_122 +; RV32I-NEXT: .LBB19_108: +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_123 ; RV32I-NEXT: .LBB19_109: -; RV32I-NEXT: beq t1, s3, .LBB19_123 +; RV32I-NEXT: beq t0, s4, .LBB19_124 ; RV32I-NEXT: .LBB19_110: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_124 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_125 ; RV32I-NEXT: .LBB19_111: -; RV32I-NEXT: beq t1, s5, .LBB19_125 +; RV32I-NEXT: beq t0, s1, .LBB19_126 ; RV32I-NEXT: .LBB19_112: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_126 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_127 ; RV32I-NEXT: .LBB19_113: -; RV32I-NEXT: beq t1, s0, .LBB19_127 +; RV32I-NEXT: beq t0, s10, .LBB19_128 ; RV32I-NEXT: .LBB19_114: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB19_128 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB19_129 ; RV32I-NEXT: .LBB19_115: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_129 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_130 ; RV32I-NEXT: .LBB19_116: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB19_130 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB19_131 ; RV32I-NEXT: .LBB19_117: -; RV32I-NEXT: bnez t0, .LBB19_131 +; RV32I-NEXT: bnez a5, .LBB19_132 ; RV32I-NEXT: .LBB19_118: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_132 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB19_133 ; RV32I-NEXT: .LBB19_119: -; RV32I-NEXT: beqz t1, .LBB19_133 +; RV32I-NEXT: beqz t0, .LBB19_134 ; RV32I-NEXT: .LBB19_120: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB19_134 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_135 +; RV32I-NEXT: j .LBB19_136 ; RV32I-NEXT: .LBB19_121: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_109 +; RV32I-NEXT: lw t5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s0, .LBB19_108 ; RV32I-NEXT: .LBB19_122: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s3, .LBB19_110 +; RV32I-NEXT: or a6, ra, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_109 ; RV32I-NEXT: .LBB19_123: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_111 +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB19_110 ; RV32I-NEXT: .LBB19_124: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s5, .LBB19_112 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_111 ; RV32I-NEXT: .LBB19_125: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_113 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB19_112 ; RV32I-NEXT: .LBB19_126: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s0, .LBB19_114 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_113 ; RV32I-NEXT: .LBB19_127: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB19_115 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s10, .LBB19_114 ; RV32I-NEXT: .LBB19_128: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_116 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB19_115 ; RV32I-NEXT: .LBB19_129: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB19_117 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_116 ; RV32I-NEXT: .LBB19_130: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB19_118 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB19_117 ; RV32I-NEXT: .LBB19_131: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_119 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB19_118 ; RV32I-NEXT: .LBB19_132: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bnez t1, .LBB19_120 +; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB19_119 ; RV32I-NEXT: .LBB19_133: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: lw a6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB19_120 ; RV32I-NEXT: .LBB19_134: -; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_148 -; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: beq t1, s2, .LBB19_149 +; RV32I-NEXT: or a6, ra, a6 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_136 +; RV32I-NEXT: .LBB19_135: +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_136: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq t0, s0, .LBB19_149 +; RV32I-NEXT: # %bb.137: +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB19_150 -; RV32I-NEXT: .LBB19_137: -; RV32I-NEXT: beq t1, s3, .LBB19_151 ; RV32I-NEXT: .LBB19_138: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_152 +; RV32I-NEXT: beq t0, s4, .LBB19_151 ; RV32I-NEXT: .LBB19_139: -; RV32I-NEXT: beq t1, s5, .LBB19_153 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB19_152 ; RV32I-NEXT: .LBB19_140: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB19_154 +; RV32I-NEXT: beq t0, s1, .LBB19_153 ; RV32I-NEXT: .LBB19_141: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB19_155 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB19_154 ; RV32I-NEXT: .LBB19_142: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_156 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB19_155 ; RV32I-NEXT: .LBB19_143: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB19_157 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_156 ; RV32I-NEXT: .LBB19_144: -; RV32I-NEXT: bnez t0, .LBB19_158 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB19_157 ; RV32I-NEXT: .LBB19_145: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB19_159 +; RV32I-NEXT: bnez a5, .LBB19_158 ; RV32I-NEXT: .LBB19_146: -; RV32I-NEXT: beqz t1, .LBB19_160 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB19_159 ; RV32I-NEXT: .LBB19_147: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t0, .LBB19_160 +; RV32I-NEXT: .LBB19_148: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB19_161 ; RV32I-NEXT: j .LBB19_162 -; RV32I-NEXT: .LBB19_148: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s2, .LBB19_136 ; RV32I-NEXT: .LBB19_149: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_137 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_138 ; RV32I-NEXT: .LBB19_150: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s3, .LBB19_138 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB19_139 ; RV32I-NEXT: .LBB19_151: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_139 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB19_140 ; RV32I-NEXT: .LBB19_152: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s5, .LBB19_140 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s1, .LBB19_141 ; RV32I-NEXT: .LBB19_153: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB19_141 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB19_142 ; RV32I-NEXT: .LBB19_154: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB19_142 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB19_143 ; RV32I-NEXT: .LBB19_155: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_143 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_144 ; RV32I-NEXT: .LBB19_156: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB19_144 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB19_145 ; RV32I-NEXT: .LBB19_157: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB19_145 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB19_146 ; RV32I-NEXT: .LBB19_158: -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB19_146 +; RV32I-NEXT: mv t2, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB19_147 ; RV32I-NEXT: .LBB19_159: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB19_147 +; RV32I-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB19_148 ; RV32I-NEXT: .LBB19_160: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s11, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB19_162 ; RV32I-NEXT: .LBB19_161: -; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_162: -; RV32I-NEXT: beq t1, s2, .LBB19_174 +; RV32I-NEXT: beq t0, s0, .LBB19_174 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB19_175 ; RV32I-NEXT: .LBB19_164: -; RV32I-NEXT: beq t1, s3, .LBB19_176 +; RV32I-NEXT: beq t0, s4, .LBB19_176 ; RV32I-NEXT: .LBB19_165: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s5, .LBB19_177 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s1, .LBB19_177 ; RV32I-NEXT: .LBB19_166: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s0, .LBB19_178 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB19_178 ; RV32I-NEXT: .LBB19_167: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB19_179 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB19_179 ; RV32I-NEXT: .LBB19_168: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_180 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_180 ; RV32I-NEXT: .LBB19_169: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB19_181 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB19_181 ; RV32I-NEXT: .LBB19_170: -; RV32I-NEXT: bnez t0, .LBB19_182 +; RV32I-NEXT: bnez a5, .LBB19_182 ; RV32I-NEXT: .LBB19_171: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB19_183 ; RV32I-NEXT: .LBB19_172: -; RV32I-NEXT: beqz t1, .LBB19_184 +; RV32I-NEXT: beqz t0, .LBB19_184 ; RV32I-NEXT: .LBB19_173: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB19_185 ; RV32I-NEXT: j .LBB19_186 ; RV32I-NEXT: .LBB19_174: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB19_164 ; RV32I-NEXT: .LBB19_175: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s3, .LBB19_165 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s4, .LBB19_165 ; RV32I-NEXT: .LBB19_176: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s5, .LBB19_166 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s1, .LBB19_166 ; RV32I-NEXT: .LBB19_177: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s0, .LBB19_167 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB19_167 ; RV32I-NEXT: .LBB19_178: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB19_168 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB19_168 ; RV32I-NEXT: .LBB19_179: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_169 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_169 ; RV32I-NEXT: .LBB19_180: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB19_170 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB19_170 ; RV32I-NEXT: .LBB19_181: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB19_171 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB19_171 ; RV32I-NEXT: .LBB19_182: -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a0, t5 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB19_172 ; RV32I-NEXT: .LBB19_183: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bnez t1, .LBB19_173 +; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB19_173 ; RV32I-NEXT: .LBB19_184: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB19_186 ; RV32I-NEXT: .LBB19_185: -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv t5, s3 ; RV32I-NEXT: .LBB19_186: -; RV32I-NEXT: beq t1, s2, .LBB19_197 +; RV32I-NEXT: beq t0, s0, .LBB19_197 ; RV32I-NEXT: # %bb.187: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB19_198 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s4, .LBB19_198 ; RV32I-NEXT: .LBB19_188: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB19_199 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s1, .LBB19_199 ; RV32I-NEXT: .LBB19_189: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB19_200 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB19_200 ; RV32I-NEXT: .LBB19_190: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB19_201 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB19_201 ; RV32I-NEXT: .LBB19_191: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_202 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_202 ; RV32I-NEXT: .LBB19_192: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB19_203 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB19_203 ; RV32I-NEXT: .LBB19_193: -; RV32I-NEXT: bnez t0, .LBB19_204 +; RV32I-NEXT: bnez a5, .LBB19_204 ; RV32I-NEXT: .LBB19_194: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB19_205 ; RV32I-NEXT: .LBB19_195: -; RV32I-NEXT: beqz t1, .LBB19_206 +; RV32I-NEXT: beqz t0, .LBB19_206 ; RV32I-NEXT: .LBB19_196: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s2, .LBB19_207 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s0, .LBB19_207 ; RV32I-NEXT: j .LBB19_208 ; RV32I-NEXT: .LBB19_197: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s3, .LBB19_188 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s4, .LBB19_188 ; RV32I-NEXT: .LBB19_198: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB19_189 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s1, .LBB19_189 ; RV32I-NEXT: .LBB19_199: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB19_190 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB19_190 ; RV32I-NEXT: .LBB19_200: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB19_191 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB19_191 ; RV32I-NEXT: .LBB19_201: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_192 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_192 ; RV32I-NEXT: .LBB19_202: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB19_193 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB19_193 ; RV32I-NEXT: .LBB19_203: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB19_194 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB19_194 ; RV32I-NEXT: .LBB19_204: -; RV32I-NEXT: mv t3, a5 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv t3, a6 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB19_195 ; RV32I-NEXT: .LBB19_205: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bnez t1, .LBB19_196 +; RV32I-NEXT: mv a6, s3 +; RV32I-NEXT: bnez t0, .LBB19_196 ; RV32I-NEXT: .LBB19_206: -; RV32I-NEXT: or a3, s6, a3 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s2, .LBB19_208 +; RV32I-NEXT: or a6, s6, a6 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s0, .LBB19_208 ; RV32I-NEXT: .LBB19_207: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv t4, a6 ; RV32I-NEXT: .LBB19_208: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB19_217 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s4, .LBB19_217 ; RV32I-NEXT: # %bb.209: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB19_218 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s1, .LBB19_218 ; RV32I-NEXT: .LBB19_210: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB19_219 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB19_219 ; RV32I-NEXT: .LBB19_211: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB19_220 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s2, .LBB19_220 ; RV32I-NEXT: .LBB19_212: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB19_221 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB19_221 ; RV32I-NEXT: .LBB19_213: -; RV32I-NEXT: bne t1, s8, .LBB19_222 +; RV32I-NEXT: bne t0, t6, .LBB19_222 ; RV32I-NEXT: .LBB19_214: -; RV32I-NEXT: beqz t0, .LBB19_216 +; RV32I-NEXT: beqz a5, .LBB19_216 ; RV32I-NEXT: .LBB19_215: -; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB19_216: -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli a5, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t2, ra, 24 -; RV32I-NEXT: srli t0, s11, 16 -; RV32I-NEXT: srli t6, s11, 24 -; RV32I-NEXT: srli t1, a4, 16 -; RV32I-NEXT: srli s2, a4, 24 -; RV32I-NEXT: srli t5, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli s1, a7, 16 -; RV32I-NEXT: srli a3, a7, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s4, t3, 16 -; RV32I-NEXT: srli s6, t3, 24 -; RV32I-NEXT: srli s7, a1, 16 -; RV32I-NEXT: srli s8, a1, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s9, ra, t4 -; RV32I-NEXT: and s10, s11, t4 -; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb t2, 3(a2) -; RV32I-NEXT: and a5, a4, t4 -; RV32I-NEXT: srli t2, s10, 8 -; RV32I-NEXT: sb s11, 4(a2) -; RV32I-NEXT: sb t2, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and t0, a6, t4 -; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: lui a5, 16 +; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: srli a6, s5, 16 +; RV32I-NEXT: and t0, s5, a5 +; RV32I-NEXT: srli t1, s5, 24 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb s5, 0(a2) +; RV32I-NEXT: sb t0, 1(a2) +; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: srli a6, a3, 16 +; RV32I-NEXT: and t0, a3, a5 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: srli t1, a3, 24 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb t0, 5(a2) +; RV32I-NEXT: sb a6, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: srli a3, a4, 16 +; RV32I-NEXT: and a6, a4, a5 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: srli t0, a4, 24 ; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a7, t4 -; RV32I-NEXT: srli a5, t0, 8 -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a5, a0, t4 +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb t0, 11(a2) +; RV32I-NEXT: srli a3, a7, 16 +; RV32I-NEXT: and a4, a7, a5 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: sb a7, 12(a2) +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a6, 15(a2) +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: and a4, t2, a5 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: srli a6, t2, 24 +; RV32I-NEXT: sb t2, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, t3, t4 -; RV32I-NEXT: and a4, a1, t4 -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a5 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 20(a2) -; RV32I-NEXT: sb a5, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a6, 23(a2) +; RV32I-NEXT: srli a0, t3, 16 +; RV32I-NEXT: and a3, t3, a5 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, t3, 24 ; RV32I-NEXT: sb t3, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a0, 26(a2) +; RV32I-NEXT: sb a4, 27(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a5, a1, a5 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a1, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB19_217: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB19_210 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s1, .LBB19_210 ; RV32I-NEXT: .LBB19_218: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB19_211 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB19_211 ; RV32I-NEXT: .LBB19_219: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB19_212 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s2, .LBB19_212 ; RV32I-NEXT: .LBB19_220: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB19_213 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB19_213 ; RV32I-NEXT: .LBB19_221: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beq t1, s8, .LBB19_214 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beq t0, t6, .LBB19_214 ; RV32I-NEXT: .LBB19_222: -; RV32I-NEXT: mv t2, a3 -; RV32I-NEXT: bnez t0, .LBB19_215 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: bnez a5, .LBB19_215 ; RV32I-NEXT: j .LBB19_216 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 @@ -11905,1201 +11800,1175 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes_dwordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s11, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: lbu a4, 1(a0) -; RV64I-NEXT: lbu a5, 2(a0) -; RV64I-NEXT: lbu a6, 3(a0) -; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 3(a0) +; RV64I-NEXT: lbu a6, 2(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: lbu t0, 5(a0) ; RV64I-NEXT: lbu t1, 6(a0) -; RV64I-NEXT: lbu t2, 7(a0) -; RV64I-NEXT: lbu t3, 8(a0) -; RV64I-NEXT: lbu t4, 9(a0) -; RV64I-NEXT: lbu t5, 10(a0) -; RV64I-NEXT: lbu t6, 11(a0) -; RV64I-NEXT: lbu s0, 12(a0) -; RV64I-NEXT: lbu s1, 13(a0) -; RV64I-NEXT: lbu s2, 14(a0) -; RV64I-NEXT: lbu s3, 15(a0) -; RV64I-NEXT: lbu s4, 16(a0) -; RV64I-NEXT: lbu s5, 17(a0) -; RV64I-NEXT: lbu s6, 18(a0) -; RV64I-NEXT: lbu s7, 19(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: lbu t2, 4(a0) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a3, a4 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu s8, 20(a0) -; RV64I-NEXT: lbu s9, 21(a0) -; RV64I-NEXT: lbu s10, 22(a0) -; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 +; RV64I-NEXT: or a3, a7, t1 +; RV64I-NEXT: lbu a7, 8(a0) +; RV64I-NEXT: lbu t1, 9(a0) +; RV64I-NEXT: lbu t3, 10(a0) +; RV64I-NEXT: lbu t4, 11(a0) +; RV64I-NEXT: lbu t5, 12(a0) +; RV64I-NEXT: lbu t6, 13(a0) +; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: lbu s1, 15(a0) +; RV64I-NEXT: or a6, t0, t2 +; RV64I-NEXT: slli a3, a3, 16 +; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, a7 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: slli t6, t6, 8 +; RV64I-NEXT: or a7, t4, t3 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 -; RV64I-NEXT: or a4, t4, t3 -; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 -; RV64I-NEXT: slli s5, s5, 8 -; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) -; RV64I-NEXT: slli s9, s9, 8 -; RV64I-NEXT: slli s11, s11, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 -; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 ; RV64I-NEXT: or s0, s1, s0 -; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu a3, 16(a0) +; RV64I-NEXT: lbu t1, 17(a0) +; RV64I-NEXT: lbu t3, 18(a0) +; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu s1, 20(a0) +; RV64I-NEXT: lbu s2, 21(a0) +; RV64I-NEXT: lbu s3, 22(a0) +; RV64I-NEXT: lbu s4, 23(a0) +; RV64I-NEXT: or t2, t6, t5 +; RV64I-NEXT: slli s0, s0, 16 +; RV64I-NEXT: or t2, s0, t2 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t4, t4, 8 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t3, s4, s3 +; RV64I-NEXT: or t4, s2, s1 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or t3, t3, t4 +; RV64I-NEXT: lbu t4, 24(a0) +; RV64I-NEXT: lbu t5, 25(a0) +; RV64I-NEXT: lbu t6, 26(a0) +; RV64I-NEXT: lbu s0, 27(a0) +; RV64I-NEXT: or a3, t1, a3 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: lbu t1, 28(a0) +; RV64I-NEXT: lbu s1, 29(a0) +; RV64I-NEXT: lbu s2, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: or a3, t3, a3 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t3, s0, t6 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t3, t3, t4 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 -; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: or a0, a0, s2 +; RV64I-NEXT: or t1, s1, t1 +; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: lbu t4, 0(a1) +; RV64I-NEXT: lbu t5, 1(a1) +; RV64I-NEXT: lbu t6, 2(a1) +; RV64I-NEXT: lbu s0, 3(a1) +; RV64I-NEXT: lbu s1, 4(a1) +; RV64I-NEXT: lbu s2, 5(a1) +; RV64I-NEXT: lbu s3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: or t1, a0, t1 +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 +; RV64I-NEXT: slli s0, s0, 8 +; RV64I-NEXT: or t5, s0, t6 +; RV64I-NEXT: slli s2, s2, 8 +; RV64I-NEXT: or t6, s2, s1 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 -; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 +; RV64I-NEXT: or s0, a1, s3 +; RV64I-NEXT: slli a1, a5, 16 +; RV64I-NEXT: slli a0, a7, 16 ; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 -; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 -; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 -; RV64I-NEXT: li t0, 64 -; RV64I-NEXT: slli t3, a5, 16 -; RV64I-NEXT: slli t2, a6, 16 +; RV64I-NEXT: slli t5, t5, 16 +; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: or a7, t5, t4 +; RV64I-NEXT: slli a5, a5, 32 ; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 -; RV64I-NEXT: slli a6, a6, 6 -; RV64I-NEXT: sub t1, a6, t0 -; RV64I-NEXT: neg t5, a6 -; RV64I-NEXT: sll t4, a5, t5 -; RV64I-NEXT: bltu a6, t0, .LBB20_2 +; RV64I-NEXT: or a7, a5, a7 +; RV64I-NEXT: or a5, t1, t3 +; RV64I-NEXT: slli a7, a7, 6 +; RV64I-NEXT: li t1, 64 +; RV64I-NEXT: neg t3, a7 +; RV64I-NEXT: sub t4, a7, t1 +; RV64I-NEXT: sll t5, a5, t3 +; RV64I-NEXT: bltu a7, t1, .LBB20_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra t6, a5, t1 +; RV64I-NEXT: sra t6, a5, t4 ; RV64I-NEXT: j .LBB20_3 ; RV64I-NEXT: .LBB20_2: -; RV64I-NEXT: srl t6, a7, a6 -; RV64I-NEXT: or t6, t6, t4 +; RV64I-NEXT: srl t6, a3, a7 +; RV64I-NEXT: or t6, t6, t5 ; RV64I-NEXT: .LBB20_3: -; RV64I-NEXT: or a3, t3, a3 -; RV64I-NEXT: slli t3, a1, 32 -; RV64I-NEXT: or t2, t2, a4 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mv a1, a7 -; RV64I-NEXT: beqz a6, .LBB20_5 +; RV64I-NEXT: or a4, a1, a4 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: slli t2, t2, 32 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: beqz a7, .LBB20_5 ; RV64I-NEXT: # %bb.4: ; RV64I-NEXT: mv a1, t6 ; RV64I-NEXT: .LBB20_5: -; RV64I-NEXT: or a4, t3, a3 -; RV64I-NEXT: or a3, a0, t2 -; RV64I-NEXT: bltu a6, t0, .LBB20_7 +; RV64I-NEXT: or a6, a6, a4 +; RV64I-NEXT: or a4, t2, a0 +; RV64I-NEXT: bltu a7, t1, .LBB20_7 ; RV64I-NEXT: # %bb.6: ; RV64I-NEXT: srai a0, a5, 63 -; RV64I-NEXT: srl t3, a3, t1 +; RV64I-NEXT: srl t4, a4, t4 ; RV64I-NEXT: j .LBB20_8 ; RV64I-NEXT: .LBB20_7: -; RV64I-NEXT: sra a0, a5, a6 -; RV64I-NEXT: srl t1, a4, a6 -; RV64I-NEXT: sll t2, a3, t5 -; RV64I-NEXT: or t3, t1, t2 +; RV64I-NEXT: srl t0, a6, a7 +; RV64I-NEXT: sll t2, a4, t3 +; RV64I-NEXT: sra a0, a5, a7 +; RV64I-NEXT: or t4, t0, t2 ; RV64I-NEXT: .LBB20_8: -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: mv t2, a4 -; RV64I-NEXT: beqz a6, .LBB20_10 +; RV64I-NEXT: li t0, 128 +; RV64I-NEXT: mv t2, a6 +; RV64I-NEXT: beqz a7, .LBB20_10 ; RV64I-NEXT: # %bb.9: -; RV64I-NEXT: mv t2, t3 +; RV64I-NEXT: mv t2, t4 ; RV64I-NEXT: .LBB20_10: -; RV64I-NEXT: sub t6, t1, a6 -; RV64I-NEXT: bltu a6, t0, .LBB20_13 +; RV64I-NEXT: sub t6, t0, a7 +; RV64I-NEXT: bltu a7, t1, .LBB20_13 ; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: li t3, 0 -; RV64I-NEXT: bgeu t6, t0, .LBB20_14 +; RV64I-NEXT: li t4, 0 +; RV64I-NEXT: bgeu t6, t1, .LBB20_14 ; RV64I-NEXT: .LBB20_12: -; RV64I-NEXT: sll t5, a7, t5 ; RV64I-NEXT: neg s0, t6 -; RV64I-NEXT: srl s0, a7, s0 -; RV64I-NEXT: or s1, s0, t4 +; RV64I-NEXT: srl s0, a3, s0 +; RV64I-NEXT: sll t3, a3, t3 +; RV64I-NEXT: or s1, s0, t5 ; RV64I-NEXT: j .LBB20_15 ; RV64I-NEXT: .LBB20_13: -; RV64I-NEXT: srl t3, a3, a6 -; RV64I-NEXT: bltu t6, t0, .LBB20_12 +; RV64I-NEXT: srl t4, a4, a7 +; RV64I-NEXT: bltu t6, t1, .LBB20_12 ; RV64I-NEXT: .LBB20_14: -; RV64I-NEXT: li t5, 0 -; RV64I-NEXT: sub t4, t6, t0 -; RV64I-NEXT: sll s1, a7, t4 +; RV64I-NEXT: li t3, 0 +; RV64I-NEXT: sub t5, t6, t1 +; RV64I-NEXT: sll s1, a3, t5 ; RV64I-NEXT: .LBB20_15: -; RV64I-NEXT: sub s0, a6, t1 -; RV64I-NEXT: mv t4, a5 +; RV64I-NEXT: sub s0, a7, t0 +; RV64I-NEXT: mv t5, a5 ; RV64I-NEXT: beqz t6, .LBB20_17 ; RV64I-NEXT: # %bb.16: -; RV64I-NEXT: mv t4, s1 +; RV64I-NEXT: mv t5, s1 ; RV64I-NEXT: .LBB20_17: -; RV64I-NEXT: bltu s0, t0, .LBB20_19 +; RV64I-NEXT: bltu s0, t1, .LBB20_19 ; RV64I-NEXT: # %bb.18: -; RV64I-NEXT: sub t6, s0, t0 +; RV64I-NEXT: sub t6, s0, t1 ; RV64I-NEXT: sra t6, a5, t6 ; RV64I-NEXT: bnez s0, .LBB20_20 ; RV64I-NEXT: j .LBB20_21 ; RV64I-NEXT: .LBB20_19: -; RV64I-NEXT: srl t6, a7, s0 -; RV64I-NEXT: neg s1, s0 -; RV64I-NEXT: sll s1, a5, s1 -; RV64I-NEXT: or t6, t6, s1 +; RV64I-NEXT: neg t6, s0 +; RV64I-NEXT: srl s1, a3, s0 +; RV64I-NEXT: sll t6, a5, t6 +; RV64I-NEXT: or t6, s1, t6 ; RV64I-NEXT: beqz s0, .LBB20_21 ; RV64I-NEXT: .LBB20_20: -; RV64I-NEXT: mv a7, t6 +; RV64I-NEXT: mv a3, t6 ; RV64I-NEXT: .LBB20_21: -; RV64I-NEXT: bltu s0, t0, .LBB20_23 +; RV64I-NEXT: bltu s0, t1, .LBB20_23 ; RV64I-NEXT: # %bb.22: -; RV64I-NEXT: srai t0, a5, 63 -; RV64I-NEXT: bltu a6, t1, .LBB20_24 +; RV64I-NEXT: srai t1, a5, 63 +; RV64I-NEXT: bltu a7, t0, .LBB20_24 ; RV64I-NEXT: j .LBB20_25 ; RV64I-NEXT: .LBB20_23: -; RV64I-NEXT: sra t0, a5, s0 -; RV64I-NEXT: bgeu a6, t1, .LBB20_25 +; RV64I-NEXT: sra t1, a5, s0 +; RV64I-NEXT: bgeu a7, t0, .LBB20_25 ; RV64I-NEXT: .LBB20_24: -; RV64I-NEXT: or a7, t2, t5 -; RV64I-NEXT: or t0, t3, t4 +; RV64I-NEXT: or a3, t2, t3 +; RV64I-NEXT: or t1, t4, t5 ; RV64I-NEXT: .LBB20_25: -; RV64I-NEXT: bnez a6, .LBB20_29 +; RV64I-NEXT: bnez a7, .LBB20_29 ; RV64I-NEXT: # %bb.26: -; RV64I-NEXT: bltu a6, t1, .LBB20_28 +; RV64I-NEXT: bltu a7, t0, .LBB20_28 ; RV64I-NEXT: .LBB20_27: ; RV64I-NEXT: srai a1, a5, 63 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB20_28: -; RV64I-NEXT: srli a5, a4, 32 -; RV64I-NEXT: srliw a6, a4, 16 -; RV64I-NEXT: lui t2, 16 -; RV64I-NEXT: srliw t1, a4, 24 -; RV64I-NEXT: srli t0, a4, 48 -; RV64I-NEXT: srli t5, a4, 56 -; RV64I-NEXT: srli a7, a3, 32 -; RV64I-NEXT: srliw t4, a3, 16 -; RV64I-NEXT: srliw s0, a3, 24 -; RV64I-NEXT: srli t6, a3, 48 -; RV64I-NEXT: srli s3, a3, 56 -; RV64I-NEXT: srli t3, a1, 32 -; RV64I-NEXT: srliw s2, a1, 16 -; RV64I-NEXT: srliw s6, a1, 24 -; RV64I-NEXT: srli s4, a1, 48 -; RV64I-NEXT: srli s7, a1, 56 -; RV64I-NEXT: srli s1, a0, 32 -; RV64I-NEXT: srliw s5, a0, 16 -; RV64I-NEXT: srliw s8, a0, 24 -; RV64I-NEXT: srli s9, a0, 48 -; RV64I-NEXT: srli s10, a0, 56 -; RV64I-NEXT: addi t2, t2, -1 -; RV64I-NEXT: and s11, a4, t2 -; RV64I-NEXT: srli s11, s11, 8 -; RV64I-NEXT: sb a4, 0(a2) -; RV64I-NEXT: sb s11, 1(a2) -; RV64I-NEXT: sb a6, 2(a2) -; RV64I-NEXT: sb t1, 3(a2) -; RV64I-NEXT: and a4, a5, t2 -; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: srli a5, a6, 32 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: srliw a7, a6, 16 +; RV64I-NEXT: addi a3, a3, -1 +; RV64I-NEXT: srliw t0, a6, 24 +; RV64I-NEXT: and t1, a6, a3 +; RV64I-NEXT: srli t1, t1, 8 +; RV64I-NEXT: sb a6, 0(a2) +; RV64I-NEXT: sb t1, 1(a2) +; RV64I-NEXT: sb a7, 2(a2) +; RV64I-NEXT: sb t0, 3(a2) +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli t0, a6, 48 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a6, a6, 56 ; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: sb a7, 5(a2) ; RV64I-NEXT: sb t0, 6(a2) -; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: and a4, a3, t2 -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a3, 8(a2) -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb t4, 10(a2) -; RV64I-NEXT: sb s0, 11(a2) -; RV64I-NEXT: and a3, a7, t2 -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a7, 12(a2) -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: sb t6, 14(a2) -; RV64I-NEXT: sb s3, 15(a2) -; RV64I-NEXT: and a3, a1, t2 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: srli a5, a4, 32 +; RV64I-NEXT: srliw a6, a4, 16 +; RV64I-NEXT: and a7, a4, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srliw t0, a4, 24 +; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a7, 9(a2) +; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb t0, 11(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: and a7, a5, a3 +; RV64I-NEXT: srli a7, a7, 8 +; RV64I-NEXT: srli a4, a4, 56 +; RV64I-NEXT: sb a5, 12(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a4, 15(a2) +; RV64I-NEXT: srli a4, a1, 32 +; RV64I-NEXT: and a5, a1, a3 +; RV64I-NEXT: srliw a6, a1, 16 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a7, a1, 24 ; RV64I-NEXT: sb a1, 16(a2) -; RV64I-NEXT: sb a3, 17(a2) -; RV64I-NEXT: sb s2, 18(a2) -; RV64I-NEXT: sb s6, 19(a2) -; RV64I-NEXT: and a1, t3, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb t3, 20(a2) -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: sb s4, 22(a2) -; RV64I-NEXT: sb s7, 23(a2) -; RV64I-NEXT: and a1, a0, t2 -; RV64I-NEXT: and a3, s1, t2 -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: sb a5, 17(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: and a5, a4, a3 +; RV64I-NEXT: srli a6, a1, 48 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a4, 20(a2) +; RV64I-NEXT: sb a5, 21(a2) +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: sb a1, 23(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: srliw a4, a0, 16 +; RV64I-NEXT: and a5, a0, a3 +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srliw a6, a0, 24 ; RV64I-NEXT: sb a0, 24(a2) -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: sb s5, 26(a2) -; RV64I-NEXT: sb s8, 27(a2) -; RV64I-NEXT: sb s1, 28(a2) +; RV64I-NEXT: sb a5, 25(a2) +; RV64I-NEXT: sb a4, 26(a2) +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: and a3, a1, a3 +; RV64I-NEXT: srli a3, a3, 8 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: sb a1, 28(a2) ; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: sb s9, 30(a2) -; RV64I-NEXT: sb s10, 31(a2) -; RV64I-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s11, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: sb a4, 30(a2) +; RV64I-NEXT: sb a0, 31(a2) +; RV64I-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB20_29: -; RV64I-NEXT: mv a4, a7 -; RV64I-NEXT: mv a3, t0 -; RV64I-NEXT: bgeu a6, t1, .LBB20_27 +; RV64I-NEXT: mv a6, a3 +; RV64I-NEXT: mv a4, t1 +; RV64I-NEXT: bgeu a7, t0, .LBB20_27 ; RV64I-NEXT: j .LBB20_28 ; ; RV32I-LABEL: ashr_32bytes_dwordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -80 -; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu t0, 2(a0) -; RV32I-NEXT: lbu t1, 3(a0) -; RV32I-NEXT: lbu t2, 4(a0) -; RV32I-NEXT: lbu t3, 5(a0) -; RV32I-NEXT: lbu t4, 6(a0) -; RV32I-NEXT: lbu t5, 7(a0) -; RV32I-NEXT: lbu t6, 8(a0) -; RV32I-NEXT: lbu s0, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s2, 11(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu a6, 14(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a7, 2(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli t4, a4, 8 +; RV32I-NEXT: or a4, a3, a5 +; RV32I-NEXT: or t6, t4, a7 ; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: lbu a3, 9(a0) ; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: lbu a5, 8(a0) +; RV32I-NEXT: lbu t4, 10(a0) +; RV32I-NEXT: lbu t5, 11(a0) +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, a3, 8 +; RV32I-NEXT: or a3, t0, a7 +; RV32I-NEXT: or a7, t1, a5 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: or t1, t3, t2 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: lbu t2, 29(a0) -; RV32I-NEXT: lbu t3, 30(a0) -; RV32I-NEXT: lbu t5, 31(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, t2, 8 -; RV32I-NEXT: or t2, s0, t6 -; RV32I-NEXT: or s0, s2, s1 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: lbu t6, 0(a1) -; RV32I-NEXT: lbu s2, 1(a1) -; RV32I-NEXT: lbu s3, 2(a1) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t1, 14(a0) +; RV32I-NEXT: lbu a5, 15(a0) +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t3, 29(a0) +; RV32I-NEXT: lbu s0, 0(a1) +; RV32I-NEXT: lbu s1, 1(a1) +; RV32I-NEXT: lbu s2, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s4, t5, t3 -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: or s2, s2, t6 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu s3, 30(a0) +; RV32I-NEXT: lbu s4, 31(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: slli s1, s1, 8 +; RV32I-NEXT: or s0, s1, s0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, s3 -; RV32I-NEXT: slli t3, a3, 8 -; RV32I-NEXT: slli t6, t0, 16 -; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t5, s0, 16 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli a3, a1, 16 -; RV32I-NEXT: or s5, t4, t1 -; RV32I-NEXT: or a1, s4, s1 -; RV32I-NEXT: or t0, a3, s2 -; RV32I-NEXT: slli t0, t0, 6 -; RV32I-NEXT: srli t1, t0, 5 -; RV32I-NEXT: andi t4, t0, 31 -; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: or a1, a1, s2 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: slli t3, t4, 16 +; RV32I-NEXT: slli t5, a5, 8 +; RV32I-NEXT: slli s4, s4, 8 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a5, s4, s3 +; RV32I-NEXT: or a1, a1, s0 +; RV32I-NEXT: slli t4, a5, 16 +; RV32I-NEXT: slli a5, a1, 6 +; RV32I-NEXT: or a1, t4, t0 +; RV32I-NEXT: andi t4, a5, 31 +; RV32I-NEXT: srli t0, a5, 5 +; RV32I-NEXT: neg s3, t4 ; RV32I-NEXT: beqz t4, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a5, s5, a3 +; RV32I-NEXT: sll a6, a3, s3 ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: or s10, t6, a4 -; RV32I-NEXT: lbu t6, 12(a0) -; RV32I-NEXT: lbu s0, 19(a0) -; RV32I-NEXT: slli s1, a7, 8 -; RV32I-NEXT: or a6, t3, a6 -; RV32I-NEXT: or a4, t5, t2 -; RV32I-NEXT: srai t2, a1, 31 -; RV32I-NEXT: beqz t1, .LBB20_4 +; RV32I-NEXT: lbu s0, 12(a0) +; RV32I-NEXT: lbu s1, 19(a0) +; RV32I-NEXT: or s5, t6, a4 +; RV32I-NEXT: slli t6, t2, 8 +; RV32I-NEXT: or t5, t5, t1 +; RV32I-NEXT: or a4, t3, a7 +; RV32I-NEXT: srai t1, a1, 31 +; RV32I-NEXT: beqz t0, .LBB20_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: mv a6, t1 ; RV32I-NEXT: j .LBB20_5 ; RV32I-NEXT: .LBB20_4: -; RV32I-NEXT: srl a7, s10, t0 -; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: srl a7, s5, a5 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: .LBB20_5: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: lbu s3, 17(a0) -; RV32I-NEXT: lbu t3, 18(a0) -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: or s4, s1, t6 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: li s6, 1 -; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu t3, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) +; RV32I-NEXT: slli s4, s1, 8 +; RV32I-NEXT: or s6, t6, s0 +; RV32I-NEXT: slli s7, t5, 16 +; RV32I-NEXT: li s8, 1 +; RV32I-NEXT: sll s2, a4, s3 ; RV32I-NEXT: beqz t4, .LBB20_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv a7, s2 +; RV32I-NEXT: mv t2, s2 ; RV32I-NEXT: .LBB20_7: ; RV32I-NEXT: lbu t5, 16(a0) ; RV32I-NEXT: lbu t6, 23(a0) -; RV32I-NEXT: slli s1, s3, 8 -; RV32I-NEXT: or s0, s0, t3 -; RV32I-NEXT: srl s3, s5, t0 -; RV32I-NEXT: or a6, a6, s4 -; RV32I-NEXT: bne t1, s6, .LBB20_9 +; RV32I-NEXT: slli s1, t3, 8 +; RV32I-NEXT: or s0, s4, a7 +; RV32I-NEXT: srl s4, a3, a5 +; RV32I-NEXT: or a7, s7, s6 +; RV32I-NEXT: bne t0, s8, .LBB20_9 ; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: or a6, s4, t2 ; RV32I-NEXT: .LBB20_9: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s6, 21(a0) -; RV32I-NEXT: lbu a7, 22(a0) -; RV32I-NEXT: slli s4, t6, 8 -; RV32I-NEXT: or s7, s1, t5 -; RV32I-NEXT: slli s8, s0, 16 -; RV32I-NEXT: li s9, 2 -; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: lbu s7, 21(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: slli s6, t6, 8 +; RV32I-NEXT: or s8, s1, t5 +; RV32I-NEXT: slli s9, s0, 16 +; RV32I-NEXT: li s10, 2 +; RV32I-NEXT: sll s0, a7, s3 ; RV32I-NEXT: beqz t4, .LBB20_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t3, s0 ; RV32I-NEXT: .LBB20_11: ; RV32I-NEXT: lbu t5, 20(a0) ; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli s6, s6, 8 -; RV32I-NEXT: or s4, s4, a7 -; RV32I-NEXT: srl s1, a4, t0 -; RV32I-NEXT: or a7, s8, s7 -; RV32I-NEXT: bne t1, s9, .LBB20_13 +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s6, s6, t2 +; RV32I-NEXT: srl s1, a4, a5 +; RV32I-NEXT: or t2, s9, s8 +; RV32I-NEXT: bne t0, s10, .LBB20_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: or a5, s1, t3 +; RV32I-NEXT: or a6, s1, t3 ; RV32I-NEXT: .LBB20_13: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) ; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or s6, s6, t5 -; RV32I-NEXT: slli s9, s4, 16 -; RV32I-NEXT: li s11, 3 -; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: or s7, s7, t5 +; RV32I-NEXT: slli s10, s6, 16 +; RV32I-NEXT: sll t5, t2, s3 ; RV32I-NEXT: beqz t4, .LBB20_15 ; RV32I-NEXT: # %bb.14: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB20_15: -; RV32I-NEXT: lbu s4, 24(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: or s7, t6, s7 -; RV32I-NEXT: srl t6, a6, t0 -; RV32I-NEXT: or a0, s9, s6 -; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s11, .LBB20_17 +; RV32I-NEXT: lbu s6, 24(a0) +; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: or s8, t6, s8 +; RV32I-NEXT: srl t6, a7, a5 +; RV32I-NEXT: or a0, s10, s7 +; RV32I-NEXT: li s7, 3 +; RV32I-NEXT: bne t0, s7, .LBB20_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: or a6, t6, t3 ; RV32I-NEXT: .LBB20_17: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: or t3, s8, s4 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: li s10, 4 -; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: li s7, 0 +; RV32I-NEXT: or t3, s9, s6 +; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: li s6, 4 +; RV32I-NEXT: sll s9, a0, s3 +; RV32I-NEXT: sw s9, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB20_19 ; RV32I-NEXT: # %bb.18: -; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: lw s7, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_19: -; RV32I-NEXT: srl s4, a7, t0 -; RV32I-NEXT: or t3, s7, t3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bne t1, s10, .LBB20_21 +; RV32I-NEXT: srl ra, t2, a5 +; RV32I-NEXT: or t3, s8, t3 +; RV32I-NEXT: bne t0, s6, .LBB20_21 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: or a5, s4, s6 +; RV32I-NEXT: or a6, ra, s7 ; RV32I-NEXT: .LBB20_21: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s5, 5 -; RV32I-NEXT: sll s6, t3, a3 -; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s8, 5 +; RV32I-NEXT: sll s7, t3, s3 +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB20_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_23: -; RV32I-NEXT: srl s6, a0, t0 -; RV32I-NEXT: beq t1, s5, .LBB20_25 +; RV32I-NEXT: srl s7, a0, a5 +; RV32I-NEXT: beq t0, s8, .LBB20_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: mv s11, s7 ; RV32I-NEXT: j .LBB20_26 ; RV32I-NEXT: .LBB20_25: -; RV32I-NEXT: mv ra, s6 -; RV32I-NEXT: or a5, s6, s4 +; RV32I-NEXT: mv s11, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB20_26: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li s8, 6 -; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: li s9, 6 +; RV32I-NEXT: sll s10, a1, s3 +; RV32I-NEXT: sw s10, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: beqz t4, .LBB20_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: mv s4, s7 +; RV32I-NEXT: lw s6, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_28: -; RV32I-NEXT: srl s5, t3, t0 -; RV32I-NEXT: beq t1, s8, .LBB20_30 +; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: srl s7, t3, a5 +; RV32I-NEXT: beq t0, s9, .LBB20_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: j .LBB20_31 ; RV32I-NEXT: .LBB20_30: -; RV32I-NEXT: mv s9, s5 -; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: mv s9, s7 +; RV32I-NEXT: or a6, s7, s6 ; RV32I-NEXT: .LBB20_31: ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s6, 7 -; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: sll s3, t1, s3 ; RV32I-NEXT: beqz t4, .LBB20_33 ; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: mv s5, s3 ; RV32I-NEXT: .LBB20_33: -; RV32I-NEXT: srl a3, a1, t0 -; RV32I-NEXT: bne t1, s6, .LBB20_35 +; RV32I-NEXT: srl s6, a1, a5 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: bne t0, s7, .LBB20_35 ; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: or a5, a3, s5 +; RV32I-NEXT: or a6, s6, s5 ; RV32I-NEXT: .LBB20_35: -; RV32I-NEXT: li s5, 3 -; RV32I-NEXT: mv s6, a3 -; RV32I-NEXT: bnez t0, .LBB20_39 +; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: beqz a5, .LBB20_37 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_40 +; RV32I-NEXT: mv s5, a6 ; RV32I-NEXT: .LBB20_37: -; RV32I-NEXT: beqz t1, .LBB20_41 -; RV32I-NEXT: .LBB20_38: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB20_42 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s7, 1 +; RV32I-NEXT: beqz t4, .LBB20_39 +; RV32I-NEXT: # %bb.38: +; RV32I-NEXT: mv a6, s2 ; RV32I-NEXT: .LBB20_39: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_37 -; RV32I-NEXT: .LBB20_40: -; RV32I-NEXT: mv a3, s2 -; RV32I-NEXT: bnez t1, .LBB20_38 +; RV32I-NEXT: beqz t0, .LBB20_41 +; RV32I-NEXT: # %bb.40: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB20_42 ; RV32I-NEXT: .LBB20_41: -; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: or a6, s4, a6 ; RV32I-NEXT: .LBB20_42: -; RV32I-NEXT: li s2, 1 -; RV32I-NEXT: li s3, 2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_61 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB20_66 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: beq t1, s2, .LBB20_62 +; RV32I-NEXT: beq t0, s7, .LBB20_67 ; RV32I-NEXT: .LBB20_44: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_63 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB20_68 ; RV32I-NEXT: .LBB20_45: -; RV32I-NEXT: beq t1, s3, .LBB20_64 +; RV32I-NEXT: beq t0, s4, .LBB20_69 ; RV32I-NEXT: .LBB20_46: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_65 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB20_70 ; RV32I-NEXT: .LBB20_47: -; RV32I-NEXT: beq t1, s5, .LBB20_66 +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: bne t0, s10, .LBB20_49 ; RV32I-NEXT: .LBB20_48: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_67 +; RV32I-NEXT: or a6, ra, s2 ; RV32I-NEXT: .LBB20_49: -; RV32I-NEXT: bne t1, s10, .LBB20_51 -; RV32I-NEXT: .LBB20_50: -; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: bnez t4, .LBB20_71 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: beq t0, s10, .LBB20_72 ; RV32I-NEXT: .LBB20_51: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li s10, 5 -; RV32I-NEXT: bnez t4, .LBB20_68 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: beq t1, s10, .LBB20_69 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: bnez t4, .LBB20_73 +; RV32I-NEXT: .LBB20_52: +; RV32I-NEXT: bne t0, s8, .LBB20_54 ; RV32I-NEXT: .LBB20_53: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_70 +; RV32I-NEXT: or a6, s9, s2 ; RV32I-NEXT: .LBB20_54: -; RV32I-NEXT: bne t1, s8, .LBB20_56 -; RV32I-NEXT: .LBB20_55: -; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: beqz t4, .LBB20_56 +; RV32I-NEXT: # %bb.55: +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB20_56: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: li s8, 7 -; RV32I-NEXT: bne t1, s8, .LBB20_71 +; RV32I-NEXT: bne t0, s8, .LBB20_58 ; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: bnez t0, .LBB20_72 +; RV32I-NEXT: or a6, s6, s2 ; RV32I-NEXT: .LBB20_58: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: bnez t4, .LBB20_73 -; RV32I-NEXT: .LBB20_59: -; RV32I-NEXT: beqz t1, .LBB20_74 +; RV32I-NEXT: mv s2, t1 +; RV32I-NEXT: li s10, 7 +; RV32I-NEXT: beq t0, s10, .LBB20_60 +; RV32I-NEXT: # %bb.59: +; RV32I-NEXT: mv s2, a6 ; RV32I-NEXT: .LBB20_60: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB20_75 -; RV32I-NEXT: .LBB20_61: -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: bne t1, s2, .LBB20_44 +; RV32I-NEXT: beqz a5, .LBB20_62 +; RV32I-NEXT: # %bb.61: +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: .LBB20_62: -; RV32I-NEXT: or a5, s1, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_45 -; RV32I-NEXT: .LBB20_63: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s3, .LBB20_46 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: beqz t4, .LBB20_64 +; RV32I-NEXT: # %bb.63: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB20_64: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_47 -; RV32I-NEXT: .LBB20_65: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s5, .LBB20_48 +; RV32I-NEXT: li s2, 5 +; RV32I-NEXT: beqz t0, .LBB20_74 +; RV32I-NEXT: # %bb.65: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB20_75 ; RV32I-NEXT: .LBB20_66: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_49 +; RV32I-NEXT: mv s2, s0 +; RV32I-NEXT: bne t0, s7, .LBB20_44 ; RV32I-NEXT: .LBB20_67: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: beq t1, s10, .LBB20_50 -; RV32I-NEXT: j .LBB20_51 +; RV32I-NEXT: or a6, s1, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB20_45 ; RV32I-NEXT: .LBB20_68: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s10, .LBB20_53 +; RV32I-NEXT: mv s2, t5 +; RV32I-NEXT: bne t0, s4, .LBB20_46 ; RV32I-NEXT: .LBB20_69: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_54 +; RV32I-NEXT: or a6, t6, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB20_47 ; RV32I-NEXT: .LBB20_70: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: beq t1, s8, .LBB20_55 -; RV32I-NEXT: j .LBB20_56 +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s10, 3 +; RV32I-NEXT: beq t0, s10, .LBB20_48 +; RV32I-NEXT: j .LBB20_49 ; RV32I-NEXT: .LBB20_71: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB20_58 +; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB20_51 ; RV32I-NEXT: .LBB20_72: -; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: beqz t4, .LBB20_59 +; RV32I-NEXT: or a6, s11, s2 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: beqz t4, .LBB20_52 ; RV32I-NEXT: .LBB20_73: -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez t1, .LBB20_60 +; RV32I-NEXT: lw s2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t0, s8, .LBB20_53 +; RV32I-NEXT: j .LBB20_54 ; RV32I-NEXT: .LBB20_74: -; RV32I-NEXT: or a5, s1, a5 +; RV32I-NEXT: or a6, s1, a6 ; RV32I-NEXT: .LBB20_75: -; RV32I-NEXT: li s0, 4 -; RV32I-NEXT: li s1, 6 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_91 +; RV32I-NEXT: li s1, 3 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB20_95 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: beq t1, s2, .LBB20_92 +; RV32I-NEXT: beq t0, s7, .LBB20_96 ; RV32I-NEXT: .LBB20_77: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_93 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB20_97 ; RV32I-NEXT: .LBB20_78: -; RV32I-NEXT: beq t1, s3, .LBB20_94 +; RV32I-NEXT: beq t0, s4, .LBB20_98 ; RV32I-NEXT: .LBB20_79: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_95 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB20_99 ; RV32I-NEXT: .LBB20_80: -; RV32I-NEXT: beq t1, s5, .LBB20_96 +; RV32I-NEXT: beq t0, s1, .LBB20_100 ; RV32I-NEXT: .LBB20_81: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_97 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB20_101 ; RV32I-NEXT: .LBB20_82: -; RV32I-NEXT: beq t1, s0, .LBB20_98 +; RV32I-NEXT: beq t0, s10, .LBB20_102 ; RV32I-NEXT: .LBB20_83: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_99 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t4, .LBB20_103 ; RV32I-NEXT: .LBB20_84: -; RV32I-NEXT: beq t1, s10, .LBB20_100 +; RV32I-NEXT: beq t0, s2, .LBB20_104 ; RV32I-NEXT: .LBB20_85: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_101 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_87 ; RV32I-NEXT: .LBB20_86: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB20_102 +; RV32I-NEXT: mv s0, a6 ; RV32I-NEXT: .LBB20_87: -; RV32I-NEXT: bnez t0, .LBB20_103 -; RV32I-NEXT: .LBB20_88: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_104 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li s7, 7 +; RV32I-NEXT: beq t0, s7, .LBB20_89 +; RV32I-NEXT: # %bb.88: +; RV32I-NEXT: mv a6, s0 ; RV32I-NEXT: .LBB20_89: -; RV32I-NEXT: beqz t1, .LBB20_105 -; RV32I-NEXT: .LBB20_90: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_106 -; RV32I-NEXT: j .LBB20_107 +; RV32I-NEXT: beqz a5, .LBB20_91 +; RV32I-NEXT: # %bb.90: +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: .LBB20_91: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bne t1, s2, .LBB20_77 -; RV32I-NEXT: .LBB20_92: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_78 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: beqz t4, .LBB20_93 +; RV32I-NEXT: # %bb.92: +; RV32I-NEXT: mv a6, t5 ; RV32I-NEXT: .LBB20_93: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bne t1, s3, .LBB20_79 -; RV32I-NEXT: .LBB20_94: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_80 +; RV32I-NEXT: beqz t0, .LBB20_105 +; RV32I-NEXT: # %bb.94: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: j .LBB20_106 ; RV32I-NEXT: .LBB20_95: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s5, .LBB20_81 +; RV32I-NEXT: mv s0, t5 +; RV32I-NEXT: bne t0, s7, .LBB20_77 ; RV32I-NEXT: .LBB20_96: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_82 +; RV32I-NEXT: or a6, t6, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB20_78 ; RV32I-NEXT: .LBB20_97: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s0, .LBB20_83 +; RV32I-NEXT: lw s0, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB20_79 ; RV32I-NEXT: .LBB20_98: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_84 +; RV32I-NEXT: or a6, ra, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB20_80 ; RV32I-NEXT: .LBB20_99: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s10, .LBB20_85 +; RV32I-NEXT: lw s0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB20_81 ; RV32I-NEXT: .LBB20_100: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_86 +; RV32I-NEXT: or a6, s11, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB20_82 ; RV32I-NEXT: .LBB20_101: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB20_87 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s10, .LBB20_83 ; RV32I-NEXT: .LBB20_102: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB20_88 +; RV32I-NEXT: or a6, s9, s0 +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t4, .LBB20_84 ; RV32I-NEXT: .LBB20_103: -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_89 +; RV32I-NEXT: mv s0, s3 +; RV32I-NEXT: bne t0, s2, .LBB20_85 ; RV32I-NEXT: .LBB20_104: -; RV32I-NEXT: mv a3, t5 -; RV32I-NEXT: bnez t1, .LBB20_90 +; RV32I-NEXT: or a6, s6, s0 +; RV32I-NEXT: mv s0, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_86 +; RV32I-NEXT: j .LBB20_87 ; RV32I-NEXT: .LBB20_105: -; RV32I-NEXT: or a5, t6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_107 +; RV32I-NEXT: or a6, t6, a6 ; RV32I-NEXT: .LBB20_106: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: .LBB20_107: -; RV32I-NEXT: beq t1, s2, .LBB20_121 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_122 +; RV32I-NEXT: li t6, 7 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_121 +; RV32I-NEXT: # %bb.107: +; RV32I-NEXT: beq t0, s0, .LBB20_122 +; RV32I-NEXT: .LBB20_108: +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_123 ; RV32I-NEXT: .LBB20_109: -; RV32I-NEXT: beq t1, s3, .LBB20_123 +; RV32I-NEXT: beq t0, s4, .LBB20_124 ; RV32I-NEXT: .LBB20_110: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_124 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_125 ; RV32I-NEXT: .LBB20_111: -; RV32I-NEXT: beq t1, s5, .LBB20_125 +; RV32I-NEXT: beq t0, s1, .LBB20_126 ; RV32I-NEXT: .LBB20_112: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_126 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_127 ; RV32I-NEXT: .LBB20_113: -; RV32I-NEXT: beq t1, s0, .LBB20_127 +; RV32I-NEXT: beq t0, s10, .LBB20_128 ; RV32I-NEXT: .LBB20_114: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB20_128 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB20_129 ; RV32I-NEXT: .LBB20_115: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_129 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_130 ; RV32I-NEXT: .LBB20_116: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB20_130 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB20_131 ; RV32I-NEXT: .LBB20_117: -; RV32I-NEXT: bnez t0, .LBB20_131 +; RV32I-NEXT: bnez a5, .LBB20_132 ; RV32I-NEXT: .LBB20_118: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_132 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB20_133 ; RV32I-NEXT: .LBB20_119: -; RV32I-NEXT: beqz t1, .LBB20_133 +; RV32I-NEXT: beqz t0, .LBB20_134 ; RV32I-NEXT: .LBB20_120: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: j .LBB20_134 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_135 +; RV32I-NEXT: j .LBB20_136 ; RV32I-NEXT: .LBB20_121: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_109 +; RV32I-NEXT: lw t5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s0, .LBB20_108 ; RV32I-NEXT: .LBB20_122: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s3, .LBB20_110 +; RV32I-NEXT: or a6, ra, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_109 ; RV32I-NEXT: .LBB20_123: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_111 +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB20_110 ; RV32I-NEXT: .LBB20_124: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s5, .LBB20_112 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_111 ; RV32I-NEXT: .LBB20_125: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_113 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s1, .LBB20_112 ; RV32I-NEXT: .LBB20_126: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s0, .LBB20_114 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_113 ; RV32I-NEXT: .LBB20_127: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB20_115 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s10, .LBB20_114 ; RV32I-NEXT: .LBB20_128: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_116 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB20_115 ; RV32I-NEXT: .LBB20_129: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB20_117 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_116 ; RV32I-NEXT: .LBB20_130: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB20_118 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB20_117 ; RV32I-NEXT: .LBB20_131: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_119 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB20_118 ; RV32I-NEXT: .LBB20_132: -; RV32I-NEXT: mv a3, s11 -; RV32I-NEXT: bnez t1, .LBB20_120 +; RV32I-NEXT: mv a7, t5 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB20_119 ; RV32I-NEXT: .LBB20_133: -; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: lw a6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB20_120 ; RV32I-NEXT: .LBB20_134: -; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_148 -; RV32I-NEXT: # %bb.135: -; RV32I-NEXT: beq t1, s2, .LBB20_149 +; RV32I-NEXT: or a6, ra, a6 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_136 +; RV32I-NEXT: .LBB20_135: +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_136: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq t0, s0, .LBB20_149 +; RV32I-NEXT: # %bb.137: +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB20_150 -; RV32I-NEXT: .LBB20_137: -; RV32I-NEXT: beq t1, s3, .LBB20_151 ; RV32I-NEXT: .LBB20_138: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_152 +; RV32I-NEXT: beq t0, s4, .LBB20_151 ; RV32I-NEXT: .LBB20_139: -; RV32I-NEXT: beq t1, s5, .LBB20_153 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: bnez t4, .LBB20_152 ; RV32I-NEXT: .LBB20_140: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB20_154 +; RV32I-NEXT: beq t0, s1, .LBB20_153 ; RV32I-NEXT: .LBB20_141: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB20_155 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB20_154 ; RV32I-NEXT: .LBB20_142: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_156 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB20_155 ; RV32I-NEXT: .LBB20_143: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB20_157 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_156 ; RV32I-NEXT: .LBB20_144: -; RV32I-NEXT: bnez t0, .LBB20_158 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB20_157 ; RV32I-NEXT: .LBB20_145: -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: bnez t4, .LBB20_159 +; RV32I-NEXT: bnez a5, .LBB20_158 ; RV32I-NEXT: .LBB20_146: -; RV32I-NEXT: beqz t1, .LBB20_160 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bnez t4, .LBB20_159 ; RV32I-NEXT: .LBB20_147: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t0, .LBB20_160 +; RV32I-NEXT: .LBB20_148: +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB20_161 ; RV32I-NEXT: j .LBB20_162 -; RV32I-NEXT: .LBB20_148: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bne t1, s2, .LBB20_136 ; RV32I-NEXT: .LBB20_149: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_137 +; RV32I-NEXT: or a6, s11, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_138 ; RV32I-NEXT: .LBB20_150: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bne t1, s3, .LBB20_138 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t0, s4, .LBB20_139 ; RV32I-NEXT: .LBB20_151: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_139 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 +; RV32I-NEXT: beqz t4, .LBB20_140 ; RV32I-NEXT: .LBB20_152: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s5, .LBB20_140 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s1, .LBB20_141 ; RV32I-NEXT: .LBB20_153: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB20_141 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB20_142 ; RV32I-NEXT: .LBB20_154: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB20_142 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB20_143 ; RV32I-NEXT: .LBB20_155: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_143 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_144 ; RV32I-NEXT: .LBB20_156: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB20_144 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB20_145 ; RV32I-NEXT: .LBB20_157: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB20_145 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB20_146 ; RV32I-NEXT: .LBB20_158: -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: beqz t4, .LBB20_146 +; RV32I-NEXT: mv t2, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beqz t4, .LBB20_147 ; RV32I-NEXT: .LBB20_159: -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez t1, .LBB20_147 +; RV32I-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB20_148 ; RV32I-NEXT: .LBB20_160: -; RV32I-NEXT: or a5, ra, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s11, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB20_162 ; RV32I-NEXT: .LBB20_161: -; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: lw t5, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_162: -; RV32I-NEXT: beq t1, s2, .LBB20_174 +; RV32I-NEXT: beq t0, s0, .LBB20_174 ; RV32I-NEXT: # %bb.163: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB20_175 ; RV32I-NEXT: .LBB20_164: -; RV32I-NEXT: beq t1, s3, .LBB20_176 +; RV32I-NEXT: beq t0, s4, .LBB20_176 ; RV32I-NEXT: .LBB20_165: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s5, .LBB20_177 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s1, .LBB20_177 ; RV32I-NEXT: .LBB20_166: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s0, .LBB20_178 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB20_178 ; RV32I-NEXT: .LBB20_167: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s10, .LBB20_179 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s2, .LBB20_179 ; RV32I-NEXT: .LBB20_168: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_180 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_180 ; RV32I-NEXT: .LBB20_169: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s8, .LBB20_181 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, t6, .LBB20_181 ; RV32I-NEXT: .LBB20_170: -; RV32I-NEXT: bnez t0, .LBB20_182 +; RV32I-NEXT: bnez a5, .LBB20_182 ; RV32I-NEXT: .LBB20_171: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB20_183 ; RV32I-NEXT: .LBB20_172: -; RV32I-NEXT: beqz t1, .LBB20_184 +; RV32I-NEXT: beqz t0, .LBB20_184 ; RV32I-NEXT: .LBB20_173: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: bnez t4, .LBB20_185 ; RV32I-NEXT: j .LBB20_186 ; RV32I-NEXT: .LBB20_174: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, t5 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB20_164 ; RV32I-NEXT: .LBB20_175: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bne t1, s3, .LBB20_165 +; RV32I-NEXT: mv t5, s3 +; RV32I-NEXT: bne t0, s4, .LBB20_165 ; RV32I-NEXT: .LBB20_176: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s5, .LBB20_166 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s1, .LBB20_166 ; RV32I-NEXT: .LBB20_177: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s0, .LBB20_167 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB20_167 ; RV32I-NEXT: .LBB20_178: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s10, .LBB20_168 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s2, .LBB20_168 ; RV32I-NEXT: .LBB20_179: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_169 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_169 ; RV32I-NEXT: .LBB20_180: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s8, .LBB20_170 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, t6, .LBB20_170 ; RV32I-NEXT: .LBB20_181: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz t0, .LBB20_171 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: beqz a5, .LBB20_171 ; RV32I-NEXT: .LBB20_182: -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv a0, t5 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB20_172 ; RV32I-NEXT: .LBB20_183: -; RV32I-NEXT: mv a3, s7 -; RV32I-NEXT: bnez t1, .LBB20_173 +; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t0, .LBB20_173 ; RV32I-NEXT: .LBB20_184: -; RV32I-NEXT: or a5, s9, a3 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: or a6, s9, a6 +; RV32I-NEXT: li t5, 0 ; RV32I-NEXT: beqz t4, .LBB20_186 ; RV32I-NEXT: .LBB20_185: -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv t5, s3 ; RV32I-NEXT: .LBB20_186: -; RV32I-NEXT: beq t1, s2, .LBB20_197 +; RV32I-NEXT: beq t0, s0, .LBB20_197 ; RV32I-NEXT: # %bb.187: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB20_198 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s4, .LBB20_198 ; RV32I-NEXT: .LBB20_188: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB20_199 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s1, .LBB20_199 ; RV32I-NEXT: .LBB20_189: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB20_200 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s10, .LBB20_200 ; RV32I-NEXT: .LBB20_190: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB20_201 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s2, .LBB20_201 ; RV32I-NEXT: .LBB20_191: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_202 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_202 ; RV32I-NEXT: .LBB20_192: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s8, .LBB20_203 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, t6, .LBB20_203 ; RV32I-NEXT: .LBB20_193: -; RV32I-NEXT: bnez t0, .LBB20_204 +; RV32I-NEXT: bnez a5, .LBB20_204 ; RV32I-NEXT: .LBB20_194: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez t4, .LBB20_205 ; RV32I-NEXT: .LBB20_195: -; RV32I-NEXT: beqz t1, .LBB20_206 +; RV32I-NEXT: beqz t0, .LBB20_206 ; RV32I-NEXT: .LBB20_196: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s2, .LBB20_207 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s0, .LBB20_207 ; RV32I-NEXT: j .LBB20_208 ; RV32I-NEXT: .LBB20_197: -; RV32I-NEXT: or a5, s6, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s3, .LBB20_188 +; RV32I-NEXT: or a6, s6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s4, .LBB20_188 ; RV32I-NEXT: .LBB20_198: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB20_189 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s1, .LBB20_189 ; RV32I-NEXT: .LBB20_199: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB20_190 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s10, .LBB20_190 ; RV32I-NEXT: .LBB20_200: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB20_191 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s2, .LBB20_191 ; RV32I-NEXT: .LBB20_201: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_192 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_192 ; RV32I-NEXT: .LBB20_202: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s8, .LBB20_193 +; RV32I-NEXT: mv t5, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, t6, .LBB20_193 ; RV32I-NEXT: .LBB20_203: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: beqz t0, .LBB20_194 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beqz a5, .LBB20_194 ; RV32I-NEXT: .LBB20_204: -; RV32I-NEXT: mv t3, a5 -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: mv t3, a6 +; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: beqz t4, .LBB20_195 ; RV32I-NEXT: .LBB20_205: -; RV32I-NEXT: mv a3, s4 -; RV32I-NEXT: bnez t1, .LBB20_196 +; RV32I-NEXT: mv a6, s3 +; RV32I-NEXT: bnez t0, .LBB20_196 ; RV32I-NEXT: .LBB20_206: -; RV32I-NEXT: or a3, s6, a3 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s2, .LBB20_208 +; RV32I-NEXT: or a6, s6, a6 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s0, .LBB20_208 ; RV32I-NEXT: .LBB20_207: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv t4, a6 ; RV32I-NEXT: .LBB20_208: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s3, .LBB20_217 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s4, .LBB20_217 ; RV32I-NEXT: # %bb.209: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s5, .LBB20_218 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s1, .LBB20_218 ; RV32I-NEXT: .LBB20_210: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s0, .LBB20_219 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s10, .LBB20_219 ; RV32I-NEXT: .LBB20_211: -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: bne t1, s10, .LBB20_220 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: bne t0, s2, .LBB20_220 ; RV32I-NEXT: .LBB20_212: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: bne t1, s1, .LBB20_221 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: bne t0, s8, .LBB20_221 ; RV32I-NEXT: .LBB20_213: -; RV32I-NEXT: bne t1, s8, .LBB20_222 +; RV32I-NEXT: bne t0, t6, .LBB20_222 ; RV32I-NEXT: .LBB20_214: -; RV32I-NEXT: beqz t0, .LBB20_216 +; RV32I-NEXT: beqz a5, .LBB20_216 ; RV32I-NEXT: .LBB20_215: -; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB20_216: -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli a5, ra, 16 -; RV32I-NEXT: lui t4, 16 -; RV32I-NEXT: srli t2, ra, 24 -; RV32I-NEXT: srli t0, s11, 16 -; RV32I-NEXT: srli t6, s11, 24 -; RV32I-NEXT: srli t1, a4, 16 -; RV32I-NEXT: srli s2, a4, 24 -; RV32I-NEXT: srli t5, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli s1, a7, 16 -; RV32I-NEXT: srli a3, a7, 24 -; RV32I-NEXT: srli s0, a0, 16 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli s4, t3, 16 -; RV32I-NEXT: srli s6, t3, 24 -; RV32I-NEXT: srli s7, a1, 16 -; RV32I-NEXT: srli s8, a1, 24 -; RV32I-NEXT: addi t4, t4, -1 -; RV32I-NEXT: and s9, ra, t4 -; RV32I-NEXT: and s10, s11, t4 -; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb ra, 0(a2) -; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a5, 2(a2) -; RV32I-NEXT: sb t2, 3(a2) -; RV32I-NEXT: and a5, a4, t4 -; RV32I-NEXT: srli t2, s10, 8 -; RV32I-NEXT: sb s11, 4(a2) -; RV32I-NEXT: sb t2, 5(a2) -; RV32I-NEXT: sb t0, 6(a2) -; RV32I-NEXT: sb t6, 7(a2) -; RV32I-NEXT: and t0, a6, t4 -; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: lui a5, 16 +; RV32I-NEXT: addi a5, a5, -1 +; RV32I-NEXT: srli a6, s5, 16 +; RV32I-NEXT: and t0, s5, a5 +; RV32I-NEXT: srli t1, s5, 24 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb s5, 0(a2) +; RV32I-NEXT: sb t0, 1(a2) +; RV32I-NEXT: sb a6, 2(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: srli a6, a3, 16 +; RV32I-NEXT: and t0, a3, a5 +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: srli t1, a3, 24 +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb t0, 5(a2) +; RV32I-NEXT: sb a6, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: srli a3, a4, 16 +; RV32I-NEXT: and a6, a4, a5 +; RV32I-NEXT: srli a6, a6, 8 +; RV32I-NEXT: srli t0, a4, 24 ; RV32I-NEXT: sb a4, 8(a2) -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb t1, 10(a2) -; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a7, t4 -; RV32I-NEXT: srli a5, t0, 8 -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: sb t5, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a5, a0, t4 +; RV32I-NEXT: sb a6, 9(a2) +; RV32I-NEXT: sb a3, 10(a2) +; RV32I-NEXT: sb t0, 11(a2) +; RV32I-NEXT: srli a3, a7, 16 +; RV32I-NEXT: and a4, a7, a5 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 16(a2) +; RV32I-NEXT: srli a6, a7, 24 +; RV32I-NEXT: sb a7, 12(a2) +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: sb a3, 14(a2) +; RV32I-NEXT: sb a6, 15(a2) +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: and a4, t2, a5 +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, t2, 24 +; RV32I-NEXT: sb t2, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, t3, t4 -; RV32I-NEXT: and a4, a1, t4 -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a3, 18(a2) +; RV32I-NEXT: sb a6, 19(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: and a4, a0, a5 ; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: srli a6, a0, 24 ; RV32I-NEXT: sb a0, 20(a2) -; RV32I-NEXT: sb a5, 21(a2) -; RV32I-NEXT: sb s0, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a4, 21(a2) +; RV32I-NEXT: sb a3, 22(a2) +; RV32I-NEXT: sb a6, 23(a2) +; RV32I-NEXT: srli a0, t3, 16 +; RV32I-NEXT: and a3, t3, a5 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: srli a4, t3, 24 ; RV32I-NEXT: sb t3, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a0, 26(a2) +; RV32I-NEXT: sb a4, 27(a2) +; RV32I-NEXT: srli a0, a1, 16 +; RV32I-NEXT: and a5, a1, a5 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a1, 28(a2) -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s7, 30(a2) -; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: sb a0, 30(a2) +; RV32I-NEXT: sb a3, 31(a2) +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB20_217: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s5, .LBB20_210 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s1, .LBB20_210 ; RV32I-NEXT: .LBB20_218: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s0, .LBB20_211 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s10, .LBB20_211 ; RV32I-NEXT: .LBB20_219: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: mv a5, t2 -; RV32I-NEXT: beq t1, s10, .LBB20_212 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, s2, .LBB20_212 ; RV32I-NEXT: .LBB20_220: -; RV32I-NEXT: mv a5, a3 -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: beq t1, s1, .LBB20_213 +; RV32I-NEXT: mv t4, a6 +; RV32I-NEXT: mv a6, t1 +; RV32I-NEXT: beq t0, s8, .LBB20_213 ; RV32I-NEXT: .LBB20_221: -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beq t1, s8, .LBB20_214 +; RV32I-NEXT: mv a6, t4 +; RV32I-NEXT: beq t0, t6, .LBB20_214 ; RV32I-NEXT: .LBB20_222: -; RV32I-NEXT: mv t2, a3 -; RV32I-NEXT: bnez t0, .LBB20_215 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: bnez a5, .LBB20_215 ; RV32I-NEXT: j .LBB20_216 %src = load i256, ptr %src.ptr, align 1 %dwordOff = load i256, ptr %dwordOff.ptr, align 1 diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index 41f73f51fe7b6..39dd6d620f07c 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -377,8 +377,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; ; RV64I-LABEL: abd_ext_i32_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: srai a1, a1, 48 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srai a1, a0, 63 @@ -480,8 +480,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: .LBB9_5: ; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; @@ -518,8 +518,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: .LBB9_5: ; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; @@ -560,8 +560,8 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: .LBB10_5: ; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; @@ -598,8 +598,8 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: .LBB10_5: ; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; @@ -621,15 +621,15 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a6, 8(a1) +; RV32I-NEXT: lw t0, 8(a1) ; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw t0, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t3, t0, a6 +; RV32I-NEXT: sltu t3, a6, t0 ; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: beq t1, t2, .LBB11_2 ; RV32I-NEXT: # %bb.1: @@ -645,7 +645,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: xor t5, t1, t2 -; RV32I-NEXT: xor s0, a6, t0 +; RV32I-NEXT: xor s0, t0, a6 ; RV32I-NEXT: or t5, s0, t5 ; RV32I-NEXT: beqz t5, .LBB11_6 ; RV32I-NEXT: # %bb.5: @@ -665,16 +665,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez a7, .LBB11_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: sub t1, t2, t1 -; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t0 ; RV32I-NEXT: sub t0, t1, t3 ; RV32I-NEXT: sltu t1, a6, t5 ; RV32I-NEXT: sub t0, t0, t1 ; RV32I-NEXT: sub a6, a6, t5 ; RV32I-NEXT: j .LBB11_13 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: sltu t3, a6, t0 +; RV32I-NEXT: sltu t3, t0, a6 ; RV32I-NEXT: sub t1, t1, t2 -; RV32I-NEXT: sub a6, a6, t0 +; RV32I-NEXT: sub a6, t0, a6 ; RV32I-NEXT: sub t0, t1, t3 ; RV32I-NEXT: sltu t1, a6, t6 ; RV32I-NEXT: sub t0, t0, t1 @@ -696,18 +696,18 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: or a3, a1, a2 ; RV32I-NEXT: neg a4, a6 ; RV32I-NEXT: neg a5, t0 -; RV32I-NEXT: snez a6, a1 -; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: snez a3, a3 -; RV32I-NEXT: add a2, a2, a6 -; RV32I-NEXT: sltu a6, a4, a3 +; RV32I-NEXT: snez a6, a1 +; RV32I-NEXT: sltu a7, a4, a3 ; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a6 ; RV32I-NEXT: sub a4, a4, a3 -; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -733,22 +733,22 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: .LBB11_5: ; RV64I-NEXT: snez a2, a0 -; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sub a1, a1, a2 ; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a6, 8(a1) +; RV32ZBB-NEXT: lw t0, 8(a1) ; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) -; RV32ZBB-NEXT: lw t0, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t3, t0, a6 +; RV32ZBB-NEXT: sltu t3, a6, t0 ; RV32ZBB-NEXT: mv t4, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: @@ -764,7 +764,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32ZBB-NEXT: xor t5, t1, t2 -; RV32ZBB-NEXT: xor s0, a6, t0 +; RV32ZBB-NEXT: xor s0, t0, a6 ; RV32ZBB-NEXT: or t5, s0, t5 ; RV32ZBB-NEXT: beqz t5, .LBB11_6 ; RV32ZBB-NEXT: # %bb.5: @@ -784,16 +784,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: bnez a7, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: ; RV32ZBB-NEXT: sub t1, t2, t1 -; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t0 ; RV32ZBB-NEXT: sub t0, t1, t3 ; RV32ZBB-NEXT: sltu t1, a6, t5 ; RV32ZBB-NEXT: sub t0, t0, t1 ; RV32ZBB-NEXT: sub a6, a6, t5 ; RV32ZBB-NEXT: j .LBB11_13 ; RV32ZBB-NEXT: .LBB11_12: -; RV32ZBB-NEXT: sltu t3, a6, t0 +; RV32ZBB-NEXT: sltu t3, t0, a6 ; RV32ZBB-NEXT: sub t1, t1, t2 -; RV32ZBB-NEXT: sub a6, a6, t0 +; RV32ZBB-NEXT: sub a6, t0, a6 ; RV32ZBB-NEXT: sub t0, t1, t3 ; RV32ZBB-NEXT: sltu t1, a6, t6 ; RV32ZBB-NEXT: sub t0, t0, t1 @@ -815,18 +815,18 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: or a3, a1, a2 ; RV32ZBB-NEXT: neg a4, a6 ; RV32ZBB-NEXT: neg a5, t0 -; RV32ZBB-NEXT: snez a6, a1 -; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: snez a3, a3 -; RV32ZBB-NEXT: add a2, a2, a6 -; RV32ZBB-NEXT: sltu a6, a4, a3 +; RV32ZBB-NEXT: snez a6, a1 +; RV32ZBB-NEXT: sltu a7, a4, a3 ; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a6 ; RV32ZBB-NEXT: sub a4, a4, a3 -; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 ; RV32ZBB-NEXT: ret @@ -852,8 +852,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: .LBB11_5: ; RV64ZBB-NEXT: snez a2, a0 -; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: sub a1, a1, a2 ; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: ret %aext = sext i128 %a to i256 @@ -868,15 +868,15 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a6, 8(a1) +; RV32I-NEXT: lw t0, 8(a1) ; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw t0, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t3, t0, a6 +; RV32I-NEXT: sltu t3, a6, t0 ; RV32I-NEXT: mv t4, t3 ; RV32I-NEXT: beq t1, t2, .LBB12_2 ; RV32I-NEXT: # %bb.1: @@ -892,7 +892,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: xor t5, t1, t2 -; RV32I-NEXT: xor s0, a6, t0 +; RV32I-NEXT: xor s0, t0, a6 ; RV32I-NEXT: or t5, s0, t5 ; RV32I-NEXT: beqz t5, .LBB12_6 ; RV32I-NEXT: # %bb.5: @@ -912,16 +912,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez a7, .LBB12_12 ; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: sub t1, t2, t1 -; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t0 ; RV32I-NEXT: sub t0, t1, t3 ; RV32I-NEXT: sltu t1, a6, t5 ; RV32I-NEXT: sub t0, t0, t1 ; RV32I-NEXT: sub a6, a6, t5 ; RV32I-NEXT: j .LBB12_13 ; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: sltu t3, a6, t0 +; RV32I-NEXT: sltu t3, t0, a6 ; RV32I-NEXT: sub t1, t1, t2 -; RV32I-NEXT: sub a6, a6, t0 +; RV32I-NEXT: sub a6, t0, a6 ; RV32I-NEXT: sub t0, t1, t3 ; RV32I-NEXT: sltu t1, a6, t6 ; RV32I-NEXT: sub t0, t0, t1 @@ -943,18 +943,18 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: or a3, a1, a2 ; RV32I-NEXT: neg a4, a6 ; RV32I-NEXT: neg a5, t0 -; RV32I-NEXT: snez a6, a1 -; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: snez a3, a3 -; RV32I-NEXT: add a2, a2, a6 -; RV32I-NEXT: sltu a6, a4, a3 +; RV32I-NEXT: snez a6, a1 +; RV32I-NEXT: sltu a7, a4, a3 ; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a6 ; RV32I-NEXT: sub a4, a4, a3 -; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -980,22 +980,22 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: .LBB12_5: ; RV64I-NEXT: snez a2, a0 -; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: sub a1, a1, a2 ; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a6, 8(a1) +; RV32ZBB-NEXT: lw t0, 8(a1) ; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) -; RV32ZBB-NEXT: lw t0, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t3, t0, a6 +; RV32ZBB-NEXT: sltu t3, a6, t0 ; RV32ZBB-NEXT: mv t4, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: @@ -1011,7 +1011,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32ZBB-NEXT: xor t5, t1, t2 -; RV32ZBB-NEXT: xor s0, a6, t0 +; RV32ZBB-NEXT: xor s0, t0, a6 ; RV32ZBB-NEXT: or t5, s0, t5 ; RV32ZBB-NEXT: beqz t5, .LBB12_6 ; RV32ZBB-NEXT: # %bb.5: @@ -1031,16 +1031,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: bnez a7, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: ; RV32ZBB-NEXT: sub t1, t2, t1 -; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t0 ; RV32ZBB-NEXT: sub t0, t1, t3 ; RV32ZBB-NEXT: sltu t1, a6, t5 ; RV32ZBB-NEXT: sub t0, t0, t1 ; RV32ZBB-NEXT: sub a6, a6, t5 ; RV32ZBB-NEXT: j .LBB12_13 ; RV32ZBB-NEXT: .LBB12_12: -; RV32ZBB-NEXT: sltu t3, a6, t0 +; RV32ZBB-NEXT: sltu t3, t0, a6 ; RV32ZBB-NEXT: sub t1, t1, t2 -; RV32ZBB-NEXT: sub a6, a6, t0 +; RV32ZBB-NEXT: sub a6, t0, a6 ; RV32ZBB-NEXT: sub t0, t1, t3 ; RV32ZBB-NEXT: sltu t1, a6, t6 ; RV32ZBB-NEXT: sub t0, t0, t1 @@ -1062,18 +1062,18 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: or a3, a1, a2 ; RV32ZBB-NEXT: neg a4, a6 ; RV32ZBB-NEXT: neg a5, t0 -; RV32ZBB-NEXT: snez a6, a1 -; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: snez a3, a3 -; RV32ZBB-NEXT: add a2, a2, a6 -; RV32ZBB-NEXT: sltu a6, a4, a3 +; RV32ZBB-NEXT: snez a6, a1 +; RV32ZBB-NEXT: sltu a7, a4, a3 ; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a6 ; RV32ZBB-NEXT: sub a4, a4, a3 -; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: neg a1, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 ; RV32ZBB-NEXT: ret @@ -1099,8 +1099,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: .LBB12_5: ; RV64ZBB-NEXT: snez a2, a0 -; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: sub a1, a1, a2 ; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: ret %aext = sext i128 %a to i256 @@ -1385,8 +1385,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: @@ -1460,11 +1460,11 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB17_21: ; RV32I-NEXT: sub a4, t3, a4 ; RV32I-NEXT: sub a3, t1, a3 -; RV32I-NEXT: sub a2, a2, a1 -; RV32I-NEXT: sltu a1, a4, a7 +; RV32I-NEXT: sltu t0, a4, a7 ; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: sub a3, a3, a6 -; RV32I-NEXT: sub a5, a5, a1 +; RV32I-NEXT: sub a5, a5, t0 +; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a4, 8(a0) @@ -1512,8 +1512,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) -; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) ; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: @@ -1587,11 +1587,11 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB17_21: ; RV32ZBB-NEXT: sub a4, t3, a4 ; RV32ZBB-NEXT: sub a3, t1, a3 -; RV32ZBB-NEXT: sub a2, a2, a1 -; RV32ZBB-NEXT: sltu a1, a4, a7 +; RV32ZBB-NEXT: sltu t0, a4, a7 ; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: sub a3, a3, a6 -; RV32ZBB-NEXT: sub a5, a5, a1 +; RV32ZBB-NEXT: sub a5, a5, t0 +; RV32ZBB-NEXT: sub a2, a2, a1 ; RV32ZBB-NEXT: sw a2, 0(a0) ; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) @@ -1860,19 +1860,19 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq t1, a7, .LBB22_2 +; RV32I-NEXT: sltu t1, a5, a6 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, a7, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t0, a7 ; RV32I-NEXT: .LBB22_2: ; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 @@ -1880,8 +1880,8 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: sltu t3, a1, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t1, a7 -; RV32I-NEXT: xor t6, a6, a5 +; RV32I-NEXT: xor t5, t0, a7 +; RV32I-NEXT: xor t6, a5, a6 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 @@ -1896,32 +1896,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 +; RV32I-NEXT: sltu t1, a6, a5 +; RV32I-NEXT: sub a7, a7, t0 +; RV32I-NEXT: sub a6, a6, a5 +; RV32I-NEXT: sub a5, a7, t1 +; RV32I-NEXT: sltu a7, a6, t5 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, a6, t5 ; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 -; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a1, a4, t4 ; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a7, t0, a7 +; RV32I-NEXT: sub a6, a5, a6 +; RV32I-NEXT: sub a5, a7, t1 +; RV32I-NEXT: sltu a7, a6, t3 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, a6, t3 +; RV32I-NEXT: sub a1, a1, t2 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1947,19 +1947,19 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lw a5, 8(a1) ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq t1, a7, .LBB22_2 +; RV32ZBB-NEXT: sltu t1, a5, a6 +; RV32ZBB-NEXT: mv t4, t1 +; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t0, a7 ; RV32ZBB-NEXT: .LBB22_2: ; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 @@ -1967,8 +1967,8 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: sltu t3, a1, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t1, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 +; RV32ZBB-NEXT: xor t5, t0, a7 +; RV32ZBB-NEXT: xor t6, a5, a6 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 @@ -1983,32 +1983,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 +; RV32ZBB-NEXT: sltu t1, a6, a5 +; RV32ZBB-NEXT: sub a7, a7, t0 +; RV32ZBB-NEXT: sub a6, a6, a5 +; RV32ZBB-NEXT: sub a5, a7, t1 +; RV32ZBB-NEXT: sltu a7, a6, t5 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, a6, t5 ; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 -; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a1, a4, t4 ; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a7, t0, a7 +; RV32ZBB-NEXT: sub a6, a5, a6 +; RV32ZBB-NEXT: sub a5, a7, t1 +; RV32ZBB-NEXT: sltu a7, a6, t3 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, a6, t3 +; RV32ZBB-NEXT: sub a1, a1, t2 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a6, 8(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: @@ -2289,8 +2289,8 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: srai a2, a1, 31 ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: xor a1, a1, a2 @@ -2312,8 +2312,8 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: srai a2, a1, 31 ; RV32ZBB-NEXT: xor a0, a0, a2 ; RV32ZBB-NEXT: xor a1, a1, a2 @@ -2340,8 +2340,8 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: srai a2, a1, 31 ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: xor a1, a1, a2 @@ -2363,8 +2363,8 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: srai a2, a1, 31 ; RV32ZBB-NEXT: xor a0, a0, a2 ; RV32ZBB-NEXT: xor a1, a1, a2 @@ -2389,63 +2389,63 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: sltu a1, t0, a5 -; RV32I-NEXT: sub t1, t1, a6 -; RV32I-NEXT: sltu a6, a2, a3 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a7, a4, .LBB31_2 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw t1, 4(a1) +; RV32I-NEXT: sltu a1, a5, a7 +; RV32I-NEXT: sub t0, t0, t2 +; RV32I-NEXT: sltu a2, a3, a4 +; RV32I-NEXT: sub a1, t0, a1 +; RV32I-NEXT: mv t0, a2 +; RV32I-NEXT: beq t1, a6, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t0, t1, a6 ; RV32I-NEXT: .LBB31_2: -; RV32I-NEXT: sub a5, t0, a5 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: sltu a2, a5, t1 -; RV32I-NEXT: sub t0, a4, a6 -; RV32I-NEXT: sub a4, a5, t1 -; RV32I-NEXT: sub a5, a1, a2 -; RV32I-NEXT: srai a1, a5, 31 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a4, a3, a4 +; RV32I-NEXT: sltu a3, a5, t0 +; RV32I-NEXT: sub t1, a6, a2 +; RV32I-NEXT: sub a2, a1, a3 +; RV32I-NEXT: sub a3, a5, t0 +; RV32I-NEXT: srai a1, a2, 31 +; RV32I-NEXT: xor a3, a3, a1 +; RV32I-NEXT: xor a5, a2, a1 ; RV32I-NEXT: xor a2, a4, a1 -; RV32I-NEXT: xor a5, a5, a1 -; RV32I-NEXT: xor a4, a3, a1 -; RV32I-NEXT: sltu a3, a1, a2 +; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: sub a6, a1, a5 -; RV32I-NEXT: sltu a5, a1, a4 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: xor a7, t0, a1 -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz t0, .LBB31_4 +; RV32I-NEXT: sltu a5, a1, a2 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a6, t1, a1 +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: beqz t1, .LBB31_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a6, a1, a7 +; RV32I-NEXT: sltu a7, a1, a6 ; RV32I-NEXT: .LBB31_4: -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a7, a1, a7 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a2, a6 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sltu t0, a3, a7 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub a4, a4, t0 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a4, a0, a2 ; RV64I-NEXT: sub a1, a1, a3 -; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: srai a2, a1, 63 ; RV64I-NEXT: xor a0, a0, a2 ; RV64I-NEXT: xor a1, a1, a2 @@ -2457,63 +2457,63 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a7, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a5 -; RV32ZBB-NEXT: sub t1, t1, a6 -; RV32ZBB-NEXT: sltu a6, a2, a3 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a7, a4, .LBB31_2 +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a5, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw t1, 4(a1) +; RV32ZBB-NEXT: sltu a1, a5, a7 +; RV32ZBB-NEXT: sub t0, t0, t2 +; RV32ZBB-NEXT: sltu a2, a3, a4 +; RV32ZBB-NEXT: sub a1, t0, a1 +; RV32ZBB-NEXT: mv t0, a2 +; RV32ZBB-NEXT: beq t1, a6, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t0, t1, a6 ; RV32ZBB-NEXT: .LBB31_2: -; RV32ZBB-NEXT: sub a5, t0, a5 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a3, a2, a3 -; RV32ZBB-NEXT: sltu a2, a5, t1 -; RV32ZBB-NEXT: sub t0, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a1, a2 -; RV32ZBB-NEXT: srai a1, a5, 31 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a4, a3, a4 +; RV32ZBB-NEXT: sltu a3, a5, t0 +; RV32ZBB-NEXT: sub t1, a6, a2 +; RV32ZBB-NEXT: sub a2, a1, a3 +; RV32ZBB-NEXT: sub a3, a5, t0 +; RV32ZBB-NEXT: srai a1, a2, 31 +; RV32ZBB-NEXT: xor a3, a3, a1 +; RV32ZBB-NEXT: xor a5, a2, a1 ; RV32ZBB-NEXT: xor a2, a4, a1 -; RV32ZBB-NEXT: xor a5, a5, a1 -; RV32ZBB-NEXT: xor a4, a3, a1 -; RV32ZBB-NEXT: sltu a3, a1, a2 +; RV32ZBB-NEXT: sltu a4, a1, a3 ; RV32ZBB-NEXT: sub a6, a1, a5 -; RV32ZBB-NEXT: sltu a5, a1, a4 -; RV32ZBB-NEXT: sub a3, a6, a3 -; RV32ZBB-NEXT: xor a7, t0, a1 -; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz t0, .LBB31_4 +; RV32ZBB-NEXT: sltu a5, a1, a2 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a6, t1, a1 +; RV32ZBB-NEXT: mv a7, a5 +; RV32ZBB-NEXT: beqz t1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a6, a1, a7 +; RV32ZBB-NEXT: sltu a7, a1, a6 ; RV32ZBB-NEXT: .LBB31_4: -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a7, a1, a7 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a2, a6 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sltu t0, a3, a7 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub a4, a4, t0 +; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sltu a4, a0, a2 ; RV64ZBB-NEXT: sub a1, a1, a3 -; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: sub a1, a1, a4 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: srai a2, a1, 63 ; RV64ZBB-NEXT: xor a0, a0, a2 ; RV64ZBB-NEXT: xor a1, a1, a2 @@ -2531,63 +2531,63 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: sltu a1, t0, a5 -; RV32I-NEXT: sub t1, t1, a6 -; RV32I-NEXT: sltu a6, a2, a3 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a7, a4, .LBB32_2 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw t1, 4(a1) +; RV32I-NEXT: sltu a1, a5, a7 +; RV32I-NEXT: sub t0, t0, t2 +; RV32I-NEXT: sltu a2, a3, a4 +; RV32I-NEXT: sub a1, t0, a1 +; RV32I-NEXT: mv t0, a2 +; RV32I-NEXT: beq t1, a6, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t0, t1, a6 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: sub a5, t0, a5 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: sltu a2, a5, t1 -; RV32I-NEXT: sub t0, a4, a6 -; RV32I-NEXT: sub a4, a5, t1 -; RV32I-NEXT: sub a5, a1, a2 -; RV32I-NEXT: srai a1, a5, 31 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a4, a3, a4 +; RV32I-NEXT: sltu a3, a5, t0 +; RV32I-NEXT: sub t1, a6, a2 +; RV32I-NEXT: sub a2, a1, a3 +; RV32I-NEXT: sub a3, a5, t0 +; RV32I-NEXT: srai a1, a2, 31 +; RV32I-NEXT: xor a3, a3, a1 +; RV32I-NEXT: xor a5, a2, a1 ; RV32I-NEXT: xor a2, a4, a1 -; RV32I-NEXT: xor a5, a5, a1 -; RV32I-NEXT: xor a4, a3, a1 -; RV32I-NEXT: sltu a3, a1, a2 +; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: sub a6, a1, a5 -; RV32I-NEXT: sltu a5, a1, a4 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: xor a7, t0, a1 -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz t0, .LBB32_4 +; RV32I-NEXT: sltu a5, a1, a2 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a6, t1, a1 +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: beqz t1, .LBB32_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a6, a1, a7 +; RV32I-NEXT: sltu a7, a1, a6 ; RV32I-NEXT: .LBB32_4: -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a7, a1, a7 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a2, a6 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sltu t0, a3, a7 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub a4, a4, t0 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128_undef: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a4, a0, a2 ; RV64I-NEXT: sub a1, a1, a3 -; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: srai a2, a1, 63 ; RV64I-NEXT: xor a0, a0, a2 ; RV64I-NEXT: xor a1, a1, a2 @@ -2599,63 +2599,63 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a7, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a5 -; RV32ZBB-NEXT: sub t1, t1, a6 -; RV32ZBB-NEXT: sltu a6, a2, a3 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a7, a4, .LBB32_2 +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a5, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) +; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw t1, 4(a1) +; RV32ZBB-NEXT: sltu a1, a5, a7 +; RV32ZBB-NEXT: sub t0, t0, t2 +; RV32ZBB-NEXT: sltu a2, a3, a4 +; RV32ZBB-NEXT: sub a1, t0, a1 +; RV32ZBB-NEXT: mv t0, a2 +; RV32ZBB-NEXT: beq t1, a6, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t0, t1, a6 ; RV32ZBB-NEXT: .LBB32_2: -; RV32ZBB-NEXT: sub a5, t0, a5 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a3, a2, a3 -; RV32ZBB-NEXT: sltu a2, a5, t1 -; RV32ZBB-NEXT: sub t0, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a1, a2 -; RV32ZBB-NEXT: srai a1, a5, 31 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a4, a3, a4 +; RV32ZBB-NEXT: sltu a3, a5, t0 +; RV32ZBB-NEXT: sub t1, a6, a2 +; RV32ZBB-NEXT: sub a2, a1, a3 +; RV32ZBB-NEXT: sub a3, a5, t0 +; RV32ZBB-NEXT: srai a1, a2, 31 +; RV32ZBB-NEXT: xor a3, a3, a1 +; RV32ZBB-NEXT: xor a5, a2, a1 ; RV32ZBB-NEXT: xor a2, a4, a1 -; RV32ZBB-NEXT: xor a5, a5, a1 -; RV32ZBB-NEXT: xor a4, a3, a1 -; RV32ZBB-NEXT: sltu a3, a1, a2 +; RV32ZBB-NEXT: sltu a4, a1, a3 ; RV32ZBB-NEXT: sub a6, a1, a5 -; RV32ZBB-NEXT: sltu a5, a1, a4 -; RV32ZBB-NEXT: sub a3, a6, a3 -; RV32ZBB-NEXT: xor a7, t0, a1 -; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz t0, .LBB32_4 +; RV32ZBB-NEXT: sltu a5, a1, a2 +; RV32ZBB-NEXT: sub a4, a6, a4 +; RV32ZBB-NEXT: xor a6, t1, a1 +; RV32ZBB-NEXT: mv a7, a5 +; RV32ZBB-NEXT: beqz t1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a6, a1, a7 +; RV32ZBB-NEXT: sltu a7, a1, a6 ; RV32ZBB-NEXT: .LBB32_4: -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a7, a1, a7 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a2, a6 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sltu t0, a3, a7 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub a4, a4, t0 +; RV32ZBB-NEXT: sub a1, a1, a2 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128_undef: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sltu a4, a0, a2 ; RV64ZBB-NEXT: sub a1, a1, a3 -; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: sub a1, a1, a4 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: srai a2, a1, 63 ; RV64ZBB-NEXT: xor a0, a0, a2 ; RV64ZBB-NEXT: xor a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index f11a9c854c465..0dfa26b3c74c0 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -187,8 +187,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; ; RV64I-LABEL: abd_ext_i16_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: sext.w a1, a1 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srai a1, a0, 63 @@ -320,8 +320,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; ; RV64I-LABEL: abd_ext_i32_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: srai a1, a1, 48 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srai a1, a0, 63 @@ -534,74 +534,74 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a7, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq a7, t1, .LBB11_2 +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sltu t2, a1, a3 -; RV32I-NEXT: sltu t5, a2, a4 +; RV32I-NEXT: sltu t2, a5, a4 +; RV32I-NEXT: sltu t5, a1, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a4, a2, .LBB11_4 +; RV32I-NEXT: beq a3, a1, .LBB11_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB11_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t6, a7, t1 -; RV32I-NEXT: xor s0, a5, a6 +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a7, a6 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: beqz t6, .LBB11_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB11_6: ; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beq a2, a4, .LBB11_8 +; RV32I-NEXT: beq a1, a3, .LBB11_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: sltu t5, a3, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: mv t6, t5 -; RV32I-NEXT: beq a4, a2, .LBB11_10 +; RV32I-NEXT: beq a3, a1, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a3, a1 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: bnez t3, .LBB11_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a4, a5, a4 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a4, a7, t0 -; RV32I-NEXT: sltu a6, a5, t4 -; RV32I-NEXT: sub a3, a2, t2 -; RV32I-NEXT: sub a2, a4, a6 -; RV32I-NEXT: sub a4, a5, t4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a3, a6, t4 ; RV32I-NEXT: j .LBB11_13 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sub a2, a7, t0 -; RV32I-NEXT: sltu a6, a5, t6 -; RV32I-NEXT: sub a3, a4, t5 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a4, a5, t6 +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: sub a1, a3, t5 +; RV32I-NEXT: sub a3, a6, t6 ; RV32I-NEXT: .LBB11_13: -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -630,74 +630,74 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a5, 8(a1) -; RV32ZBB-NEXT: lw a7, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq a7, t1, .LBB11_2 +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sltu t2, a1, a3 -; RV32ZBB-NEXT: sltu t5, a2, a4 +; RV32ZBB-NEXT: sltu t2, a5, a4 +; RV32ZBB-NEXT: sltu t5, a1, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a4, a2, .LBB11_4 +; RV32ZBB-NEXT: beq a3, a1, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB11_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t6, a7, t1 -; RV32ZBB-NEXT: xor s0, a5, a6 +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a7, a6 ; RV32ZBB-NEXT: or t6, s0, t6 ; RV32ZBB-NEXT: beqz t6, .LBB11_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB11_6: ; RV32ZBB-NEXT: mv t4, t2 -; RV32ZBB-NEXT: beq a2, a4, .LBB11_8 +; RV32ZBB-NEXT: beq a1, a3, .LBB11_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: sltu t5, a3, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: mv t6, t5 -; RV32ZBB-NEXT: beq a4, a2, .LBB11_10 +; RV32ZBB-NEXT: beq a3, a1, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a3, a1 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: bnez t3, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 +; RV32ZBB-NEXT: sub a4, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a7 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a4, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t4 -; RV32ZBB-NEXT: sub a3, a2, t2 -; RV32ZBB-NEXT: sub a2, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t4 +; RV32ZBB-NEXT: sub a1, a1, t2 +; RV32ZBB-NEXT: sub a3, a6, t4 ; RV32ZBB-NEXT: j .LBB11_13 ; RV32ZBB-NEXT: .LBB11_12: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: sub a2, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t6 -; RV32ZBB-NEXT: sub a3, a4, t5 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a4, a5, t6 +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t6 +; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: sub a1, a3, t5 +; RV32ZBB-NEXT: sub a3, a6, t6 ; RV32ZBB-NEXT: .LBB11_13: -; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -734,74 +734,74 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a7, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq a7, t1, .LBB12_2 +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sltu t2, a1, a3 -; RV32I-NEXT: sltu t5, a2, a4 +; RV32I-NEXT: sltu t2, a5, a4 +; RV32I-NEXT: sltu t5, a1, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a4, a2, .LBB12_4 +; RV32I-NEXT: beq a3, a1, .LBB12_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB12_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t6, a7, t1 -; RV32I-NEXT: xor s0, a5, a6 +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a7, a6 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: beqz t6, .LBB12_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB12_6: ; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beq a2, a4, .LBB12_8 +; RV32I-NEXT: beq a1, a3, .LBB12_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: sltu t5, a3, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: mv t6, t5 -; RV32I-NEXT: beq a4, a2, .LBB12_10 +; RV32I-NEXT: beq a3, a1, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a3, a1 ; RV32I-NEXT: .LBB12_10: ; RV32I-NEXT: bnez t3, .LBB12_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a4, a5, a4 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a4, a7, t0 -; RV32I-NEXT: sltu a6, a5, t4 -; RV32I-NEXT: sub a3, a2, t2 -; RV32I-NEXT: sub a2, a4, a6 -; RV32I-NEXT: sub a4, a5, t4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a3, a6, t4 ; RV32I-NEXT: j .LBB12_13 ; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sub a2, a7, t0 -; RV32I-NEXT: sltu a6, a5, t6 -; RV32I-NEXT: sub a3, a4, t5 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a4, a5, t6 +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: sub a1, a3, t5 +; RV32I-NEXT: sub a3, a6, t6 ; RV32I-NEXT: .LBB12_13: -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -830,74 +830,74 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a5, 8(a1) -; RV32ZBB-NEXT: lw a7, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq a7, t1, .LBB12_2 +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sltu t2, a1, a3 -; RV32ZBB-NEXT: sltu t5, a2, a4 +; RV32ZBB-NEXT: sltu t2, a5, a4 +; RV32ZBB-NEXT: sltu t5, a1, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a4, a2, .LBB12_4 +; RV32ZBB-NEXT: beq a3, a1, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB12_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t6, a7, t1 -; RV32ZBB-NEXT: xor s0, a5, a6 +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a7, a6 ; RV32ZBB-NEXT: or t6, s0, t6 ; RV32ZBB-NEXT: beqz t6, .LBB12_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB12_6: ; RV32ZBB-NEXT: mv t4, t2 -; RV32ZBB-NEXT: beq a2, a4, .LBB12_8 +; RV32ZBB-NEXT: beq a1, a3, .LBB12_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: sltu t5, a3, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: mv t6, t5 -; RV32ZBB-NEXT: beq a4, a2, .LBB12_10 +; RV32ZBB-NEXT: beq a3, a1, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a3, a1 ; RV32ZBB-NEXT: .LBB12_10: ; RV32ZBB-NEXT: bnez t3, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 +; RV32ZBB-NEXT: sub a4, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a7 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a4, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t4 -; RV32ZBB-NEXT: sub a3, a2, t2 -; RV32ZBB-NEXT: sub a2, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t4 +; RV32ZBB-NEXT: sub a1, a1, t2 +; RV32ZBB-NEXT: sub a3, a6, t4 ; RV32ZBB-NEXT: j .LBB12_13 ; RV32ZBB-NEXT: .LBB12_12: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: sub a2, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t6 -; RV32ZBB-NEXT: sub a3, a4, t5 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a4, a5, t6 +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t6 +; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: sub a1, a3, t5 +; RV32ZBB-NEXT: sub a3, a6, t6 ; RV32ZBB-NEXT: .LBB12_13: -; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1123,74 +1123,74 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a7, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq a7, t1, .LBB17_2 +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sltu t2, a1, a3 -; RV32I-NEXT: sltu t5, a2, a4 +; RV32I-NEXT: sltu t2, a5, a4 +; RV32I-NEXT: sltu t5, a1, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a4, a2, .LBB17_4 +; RV32I-NEXT: beq a3, a1, .LBB17_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB17_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t6, a7, t1 -; RV32I-NEXT: xor s0, a5, a6 +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a7, a6 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: beqz t6, .LBB17_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB17_6: ; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beq a2, a4, .LBB17_8 +; RV32I-NEXT: beq a1, a3, .LBB17_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: sltu t5, a3, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: mv t6, t5 -; RV32I-NEXT: beq a4, a2, .LBB17_10 +; RV32I-NEXT: beq a3, a1, .LBB17_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a3, a1 ; RV32I-NEXT: .LBB17_10: ; RV32I-NEXT: bnez t3, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a4, a5, a4 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a4, a7, t0 -; RV32I-NEXT: sltu a6, a5, t4 -; RV32I-NEXT: sub a3, a2, t2 -; RV32I-NEXT: sub a2, a4, a6 -; RV32I-NEXT: sub a4, a5, t4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a3, a6, t4 ; RV32I-NEXT: j .LBB17_13 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sub a2, a7, t0 -; RV32I-NEXT: sltu a6, a5, t6 -; RV32I-NEXT: sub a3, a4, t5 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a4, a5, t6 +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: sub a1, a3, t5 +; RV32I-NEXT: sub a3, a6, t6 ; RV32I-NEXT: .LBB17_13: -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -1219,74 +1219,74 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a5, 8(a1) -; RV32ZBB-NEXT: lw a7, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq a7, t1, .LBB17_2 +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sltu t2, a1, a3 -; RV32ZBB-NEXT: sltu t5, a2, a4 +; RV32ZBB-NEXT: sltu t2, a5, a4 +; RV32ZBB-NEXT: sltu t5, a1, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a4, a2, .LBB17_4 +; RV32ZBB-NEXT: beq a3, a1, .LBB17_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB17_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t6, a7, t1 -; RV32ZBB-NEXT: xor s0, a5, a6 +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a7, a6 ; RV32ZBB-NEXT: or t6, s0, t6 ; RV32ZBB-NEXT: beqz t6, .LBB17_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB17_6: ; RV32ZBB-NEXT: mv t4, t2 -; RV32ZBB-NEXT: beq a2, a4, .LBB17_8 +; RV32ZBB-NEXT: beq a1, a3, .LBB17_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB17_8: -; RV32ZBB-NEXT: sltu t5, a3, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: mv t6, t5 -; RV32ZBB-NEXT: beq a4, a2, .LBB17_10 +; RV32ZBB-NEXT: beq a3, a1, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a3, a1 ; RV32ZBB-NEXT: .LBB17_10: ; RV32ZBB-NEXT: bnez t3, .LBB17_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 +; RV32ZBB-NEXT: sub a4, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a7 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a4, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t4 -; RV32ZBB-NEXT: sub a3, a2, t2 -; RV32ZBB-NEXT: sub a2, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t4 +; RV32ZBB-NEXT: sub a1, a1, t2 +; RV32ZBB-NEXT: sub a3, a6, t4 ; RV32ZBB-NEXT: j .LBB17_13 ; RV32ZBB-NEXT: .LBB17_12: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: sub a2, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t6 -; RV32ZBB-NEXT: sub a3, a4, t5 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a4, a5, t6 +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t6 +; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: sub a1, a3, t5 +; RV32ZBB-NEXT: sub a3, a6, t6 ; RV32ZBB-NEXT: .LBB17_13: -; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1514,74 +1514,74 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a7, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq a7, t1, .LBB22_2 +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a1, a3 -; RV32I-NEXT: sltu t5, a2, a4 +; RV32I-NEXT: sltu t2, a5, a4 +; RV32I-NEXT: sltu t5, a1, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a4, a2, .LBB22_4 +; RV32I-NEXT: beq a3, a1, .LBB22_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB22_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t6, a7, t1 -; RV32I-NEXT: xor s0, a5, a6 +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a7, a6 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: beqz t6, .LBB22_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB22_6: ; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beq a2, a4, .LBB22_8 +; RV32I-NEXT: beq a1, a3, .LBB22_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: sltu t5, a3, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: mv t6, t5 -; RV32I-NEXT: beq a4, a2, .LBB22_10 +; RV32I-NEXT: beq a3, a1, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a3, a1 ; RV32I-NEXT: .LBB22_10: ; RV32I-NEXT: bnez t3, .LBB22_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a4, a5, a4 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a4, a7, t0 -; RV32I-NEXT: sltu a6, a5, t4 -; RV32I-NEXT: sub a3, a2, t2 -; RV32I-NEXT: sub a2, a4, a6 -; RV32I-NEXT: sub a4, a5, t4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a3, a6, t4 ; RV32I-NEXT: j .LBB22_13 ; RV32I-NEXT: .LBB22_12: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sub a2, a7, t0 -; RV32I-NEXT: sltu a6, a5, t6 -; RV32I-NEXT: sub a3, a4, t5 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a4, a5, t6 +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: sub a1, a3, t5 +; RV32I-NEXT: sub a3, a6, t6 ; RV32I-NEXT: .LBB22_13: -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -1610,74 +1610,74 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a5, 8(a1) -; RV32ZBB-NEXT: lw a7, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq a7, t1, .LBB22_2 +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a1, a3 -; RV32ZBB-NEXT: sltu t5, a2, a4 +; RV32ZBB-NEXT: sltu t2, a5, a4 +; RV32ZBB-NEXT: sltu t5, a1, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a4, a2, .LBB22_4 +; RV32ZBB-NEXT: beq a3, a1, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB22_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t6, a7, t1 -; RV32ZBB-NEXT: xor s0, a5, a6 +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a7, a6 ; RV32ZBB-NEXT: or t6, s0, t6 ; RV32ZBB-NEXT: beqz t6, .LBB22_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB22_6: ; RV32ZBB-NEXT: mv t4, t2 -; RV32ZBB-NEXT: beq a2, a4, .LBB22_8 +; RV32ZBB-NEXT: beq a1, a3, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: sltu t5, a3, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: mv t6, t5 -; RV32ZBB-NEXT: beq a4, a2, .LBB22_10 +; RV32ZBB-NEXT: beq a3, a1, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a3, a1 ; RV32ZBB-NEXT: .LBB22_10: ; RV32ZBB-NEXT: bnez t3, .LBB22_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 +; RV32ZBB-NEXT: sub a4, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a7 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a4, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t4 -; RV32ZBB-NEXT: sub a3, a2, t2 -; RV32ZBB-NEXT: sub a2, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t4 +; RV32ZBB-NEXT: sub a1, a1, t2 +; RV32ZBB-NEXT: sub a3, a6, t4 ; RV32ZBB-NEXT: j .LBB22_13 ; RV32ZBB-NEXT: .LBB22_12: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: sub a2, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t6 -; RV32ZBB-NEXT: sub a3, a4, t5 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a4, a5, t6 +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t6 +; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: sub a1, a3, t5 +; RV32ZBB-NEXT: sub a3, a6, t6 ; RV32ZBB-NEXT: .LBB22_13: -; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 @@ -1918,9 +1918,9 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: bgez a1, .LBB29_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB29_2: ; RV32I-NEXT: ret ; @@ -1941,9 +1941,9 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; RV32ZBB-NEXT: bgez a1, .LBB29_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB29_2: ; RV32ZBB-NEXT: ret ; @@ -1968,9 +1968,9 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: bgez a1, .LBB30_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB30_2: ; RV32I-NEXT: ret ; @@ -1991,9 +1991,9 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; RV32ZBB-NEXT: bgez a1, .LBB30_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB30_2: ; RV32ZBB-NEXT: ret ; @@ -2011,50 +2011,50 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a2, 12(a2) -; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a6, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a2 -; RV32I-NEXT: sltu a2, a6, a3 -; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a2 -; RV32I-NEXT: beq a1, a4, .LBB31_2 +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a2, 4(a2) +; RV32I-NEXT: lw a6, 4(a1) +; RV32I-NEXT: sltu a1, a3, a7 +; RV32I-NEXT: sub t1, t0, t1 +; RV32I-NEXT: sltu t0, a4, a5 +; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: mv t1, t0 +; RV32I-NEXT: beq a6, a2, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a4 +; RV32I-NEXT: sltu t1, a6, a2 ; RV32I-NEXT: .LBB31_2: -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a5, t1 -; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a4, t0, a4 -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a1, a6, a3 -; RV32I-NEXT: bgez a4, .LBB31_4 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: sub a2, a6, a2 +; RV32I-NEXT: sub a2, a2, t0 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: bgez a1, .LBB31_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a3, a2 -; RV32I-NEXT: snez a6, a1 -; RV32I-NEXT: neg a7, a5 -; RV32I-NEXT: snez a5, a5 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: snez a5, a2 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: add a1, a1, a7 +; RV32I-NEXT: sltu a7, a3, a5 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: add a2, a2, a6 -; RV32I-NEXT: sltu a6, a7, a3 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: sub a5, a7, a3 ; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: .LBB31_4: -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128: @@ -2066,58 +2066,58 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: bgez a1, .LBB31_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: .LBB31_2: ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a2, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a6, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a2 -; RV32ZBB-NEXT: sltu a2, a6, a3 -; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a2 -; RV32ZBB-NEXT: beq a1, a4, .LBB31_2 +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a2, 4(a2) +; RV32ZBB-NEXT: lw a6, 4(a1) +; RV32ZBB-NEXT: sltu a1, a3, a7 +; RV32ZBB-NEXT: sub t1, t0, t1 +; RV32ZBB-NEXT: sltu t0, a4, a5 +; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: mv t1, t0 +; RV32ZBB-NEXT: beq a6, a2, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a4 +; RV32ZBB-NEXT: sltu t1, a6, a2 ; RV32ZBB-NEXT: .LBB31_2: -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a4, t0, a4 -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a1, a6, a3 -; RV32ZBB-NEXT: bgez a4, .LBB31_4 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: sub a2, a6, a2 +; RV32ZBB-NEXT: sub a2, a2, t0 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: bgez a1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a3, a2 -; RV32ZBB-NEXT: snez a6, a1 -; RV32ZBB-NEXT: neg a7, a5 -; RV32ZBB-NEXT: snez a5, a5 -; RV32ZBB-NEXT: or a3, a6, a3 -; RV32ZBB-NEXT: add a4, a4, a5 +; RV32ZBB-NEXT: snez a5, a2 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a5, a6, a5 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: add a1, a1, a7 +; RV32ZBB-NEXT: sltu a7, a3, a5 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: add a2, a2, a6 -; RV32ZBB-NEXT: sltu a6, a7, a3 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: sub a5, a7, a3 ; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: .LBB31_4: -; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a5, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128: @@ -2129,9 +2129,9 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV64ZBB-NEXT: bgez a1, .LBB31_2 ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 +; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: .LBB31_2: ; RV64ZBB-NEXT: ret %sub = sub nsw i128 %a, %b @@ -2142,50 +2142,50 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a2, 12(a2) -; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a6, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a2 -; RV32I-NEXT: sltu a2, a6, a3 -; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a2 -; RV32I-NEXT: beq a1, a4, .LBB32_2 +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a2, 4(a2) +; RV32I-NEXT: lw a6, 4(a1) +; RV32I-NEXT: sltu a1, a3, a7 +; RV32I-NEXT: sub t1, t0, t1 +; RV32I-NEXT: sltu t0, a4, a5 +; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: mv t1, t0 +; RV32I-NEXT: beq a6, a2, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a4 +; RV32I-NEXT: sltu t1, a6, a2 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a5, t1 -; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a4, t0, a4 -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a1, a6, a3 -; RV32I-NEXT: bgez a4, .LBB32_4 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: sub a2, a6, a2 +; RV32I-NEXT: sub a2, a2, t0 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: bgez a1, .LBB32_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a3, a2 -; RV32I-NEXT: snez a6, a1 -; RV32I-NEXT: neg a7, a5 -; RV32I-NEXT: snez a5, a5 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: snez a5, a2 +; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: snez a7, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: add a1, a1, a7 +; RV32I-NEXT: sltu a7, a3, a5 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a3, a3, a5 ; RV32I-NEXT: add a2, a2, a6 -; RV32I-NEXT: sltu a6, a7, a3 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: sub a5, a7, a3 ; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: .LBB32_4: -; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128_undef: @@ -2197,58 +2197,58 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV64I-NEXT: bgez a1, .LBB32_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: neg a1, a1 -; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: .LBB32_2: ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a2, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a6, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a2 -; RV32ZBB-NEXT: sltu a2, a6, a3 -; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a2 -; RV32ZBB-NEXT: beq a1, a4, .LBB32_2 +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a2, 4(a2) +; RV32ZBB-NEXT: lw a6, 4(a1) +; RV32ZBB-NEXT: sltu a1, a3, a7 +; RV32ZBB-NEXT: sub t1, t0, t1 +; RV32ZBB-NEXT: sltu t0, a4, a5 +; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: mv t1, t0 +; RV32ZBB-NEXT: beq a6, a2, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a4 +; RV32ZBB-NEXT: sltu t1, a6, a2 ; RV32ZBB-NEXT: .LBB32_2: -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a4, t0, a4 -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a1, a6, a3 -; RV32ZBB-NEXT: bgez a4, .LBB32_4 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: sub a2, a6, a2 +; RV32ZBB-NEXT: sub a2, a2, t0 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: bgez a1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a3, a2 -; RV32ZBB-NEXT: snez a6, a1 -; RV32ZBB-NEXT: neg a7, a5 -; RV32ZBB-NEXT: snez a5, a5 -; RV32ZBB-NEXT: or a3, a6, a3 -; RV32ZBB-NEXT: add a4, a4, a5 +; RV32ZBB-NEXT: snez a5, a2 +; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: snez a7, a3 +; RV32ZBB-NEXT: or a5, a6, a5 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: add a1, a1, a7 +; RV32ZBB-NEXT: sltu a7, a3, a5 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a3, a3, a5 ; RV32ZBB-NEXT: add a2, a2, a6 -; RV32ZBB-NEXT: sltu a6, a7, a3 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: sub a5, a7, a3 ; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: .LBB32_4: -; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a5, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128_undef: @@ -2260,9 +2260,9 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV64ZBB-NEXT: bgez a1, .LBB32_2 ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 +; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: add a1, a1, a2 ; RV64ZBB-NEXT: neg a1, a1 -; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: .LBB32_2: ; RV64ZBB-NEXT: ret %sub = sub nsw i128 %a, %b @@ -2505,74 +2505,74 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a7, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) ; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a2, 4(a2) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq a7, t1, .LBB38_2 +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB38_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB38_2: -; RV32I-NEXT: sltu t2, a1, a3 -; RV32I-NEXT: sltu t5, a2, a4 +; RV32I-NEXT: sltu t2, a5, a4 +; RV32I-NEXT: sltu t5, a1, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a4, a2, .LBB38_4 +; RV32I-NEXT: beq a3, a1, .LBB38_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB38_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t6, a7, t1 -; RV32I-NEXT: xor s0, a5, a6 +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a7, a6 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: beqz t6, .LBB38_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB38_6: ; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beq a2, a4, .LBB38_8 +; RV32I-NEXT: beq a1, a3, .LBB38_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB38_8: -; RV32I-NEXT: sltu t5, a3, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: mv t6, t5 -; RV32I-NEXT: beq a4, a2, .LBB38_10 +; RV32I-NEXT: beq a3, a1, .LBB38_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a3, a1 ; RV32I-NEXT: .LBB38_10: ; RV32I-NEXT: bnez t3, .LBB38_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a4, a5, a4 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a4, a7, t0 -; RV32I-NEXT: sltu a6, a5, t4 -; RV32I-NEXT: sub a3, a2, t2 -; RV32I-NEXT: sub a2, a4, a6 -; RV32I-NEXT: sub a4, a5, t4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a3, a6, t4 ; RV32I-NEXT: j .LBB38_13 ; RV32I-NEXT: .LBB38_12: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sub a2, a7, t0 -; RV32I-NEXT: sltu a6, a5, t6 -; RV32I-NEXT: sub a3, a4, t5 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a4, a5, t6 +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: sub a1, a3, t5 +; RV32I-NEXT: sub a3, a6, t6 ; RV32I-NEXT: .LBB38_13: -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) ; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -2601,74 +2601,74 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_select_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a5, 8(a1) -; RV32ZBB-NEXT: lw a7, 12(a1) -; RV32ZBB-NEXT: lw a1, 0(a2) ; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq a7, t1, .LBB38_2 +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: sltu a2, a6, a7 +; RV32ZBB-NEXT: mv t4, a2 +; RV32ZBB-NEXT: beq t0, t1, .LBB38_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB38_2: -; RV32ZBB-NEXT: sltu t2, a1, a3 -; RV32ZBB-NEXT: sltu t5, a2, a4 +; RV32ZBB-NEXT: sltu t2, a5, a4 +; RV32ZBB-NEXT: sltu t5, a1, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a4, a2, .LBB38_4 +; RV32ZBB-NEXT: beq a3, a1, .LBB38_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv t3, t5 ; RV32ZBB-NEXT: .LBB38_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t6, a7, t1 -; RV32ZBB-NEXT: xor s0, a5, a6 +; RV32ZBB-NEXT: xor t6, t0, t1 +; RV32ZBB-NEXT: xor s0, a7, a6 ; RV32ZBB-NEXT: or t6, s0, t6 ; RV32ZBB-NEXT: beqz t6, .LBB38_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv t3, t4 ; RV32ZBB-NEXT: .LBB38_6: ; RV32ZBB-NEXT: mv t4, t2 -; RV32ZBB-NEXT: beq a2, a4, .LBB38_8 +; RV32ZBB-NEXT: beq a1, a3, .LBB38_8 ; RV32ZBB-NEXT: # %bb.7: ; RV32ZBB-NEXT: mv t4, t5 ; RV32ZBB-NEXT: .LBB38_8: -; RV32ZBB-NEXT: sltu t5, a3, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: mv t6, t5 -; RV32ZBB-NEXT: beq a4, a2, .LBB38_10 +; RV32ZBB-NEXT: beq a3, a1, .LBB38_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a3, a1 ; RV32ZBB-NEXT: .LBB38_10: ; RV32ZBB-NEXT: bnez t3, .LBB38_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t4 +; RV32ZBB-NEXT: sub a4, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a7 ; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a4, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t4 -; RV32ZBB-NEXT: sub a3, a2, t2 -; RV32ZBB-NEXT: sub a2, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t4 +; RV32ZBB-NEXT: sub a1, a1, t2 +; RV32ZBB-NEXT: sub a3, a6, t4 ; RV32ZBB-NEXT: j .LBB38_13 ; RV32ZBB-NEXT: .LBB38_12: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a2 -; RV32ZBB-NEXT: sub a2, a7, t0 -; RV32ZBB-NEXT: sltu a6, a5, t6 -; RV32ZBB-NEXT: sub a3, a4, t5 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a4, a5, t6 +; RV32ZBB-NEXT: sltu a2, a7, a6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a2, t0, a2 +; RV32ZBB-NEXT: sltu a7, a6, t6 +; RV32ZBB-NEXT: sub a2, a2, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: sub a1, a3, t5 +; RV32ZBB-NEXT: sub a3, a6, t6 ; RV32ZBB-NEXT: .LBB38_13: -; RV32ZBB-NEXT: sw a1, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) -; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) ; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBB-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 713b52f53e3d9..c2049a4d36f50 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -69,8 +69,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; ; RV64I-LABEL: abd_ext_i8_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: srli a1, a1, 48 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: srai a1, a0, 63 @@ -239,8 +239,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; ; RV64ZBB-LABEL: abd_ext_i16_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: slli a1, a1, 32 +; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: srli a1, a1, 32 ; RV64ZBB-NEXT: sub a0, a0, a1 ; RV64ZBB-NEXT: neg a1, a0 @@ -624,87 +624,87 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw a3, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw t1, 8(a2) +; RV32I-NEXT: lw a7, 0(a2) +; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, a3 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a2, a5 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq t0, a7, .LBB11_2 +; RV32I-NEXT: sltu a1, a4, t1 +; RV32I-NEXT: sub a2, a5, t2 +; RV32I-NEXT: sltu t2, a3, a7 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: mv a2, t2 +; RV32I-NEXT: beq t0, a6, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, t0, a7 +; RV32I-NEXT: sltu a2, t0, a6 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: sltu t3, a3, t1 +; RV32I-NEXT: sub t1, a4, t1 +; RV32I-NEXT: sltu t3, t1, a2 ; RV32I-NEXT: sub a1, a1, t3 -; RV32I-NEXT: sub a3, a3, t1 -; RV32I-NEXT: beq a1, a6, .LBB11_4 +; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: beq a1, a5, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a1 +; RV32I-NEXT: sltu t1, a5, a1 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a4, a3 +; RV32I-NEXT: sltu t1, a4, a2 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a2, a5 -; RV32I-NEXT: beq a7, t0, .LBB11_7 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub a7, a3, a7 +; RV32I-NEXT: beq a6, t0, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a2, t0, a7 +; RV32I-NEXT: sltu a3, t0, a6 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sltu a2, a2, a5 +; RV32I-NEXT: sltu a3, a3, a7 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a3, a4 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: xor a4, a2, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: beqz a4, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a2, t1 +; RV32I-NEXT: mv a3, t1 ; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: xor t0, a5, a4 -; RV32I-NEXT: xor t3, a7, a4 -; RV32I-NEXT: sltu a5, t0, a4 -; RV32I-NEXT: add a6, t3, a2 -; RV32I-NEXT: add t0, t0, a2 -; RV32I-NEXT: sub t1, a6, a5 -; RV32I-NEXT: snez a6, t1 -; RV32I-NEXT: snez t2, t0 -; RV32I-NEXT: or a6, t2, a6 -; RV32I-NEXT: beqz a7, .LBB11_12 +; RV32I-NEXT: neg t1, a3 +; RV32I-NEXT: xor a5, a7, t1 +; RV32I-NEXT: xor t3, a6, t1 +; RV32I-NEXT: sltu t2, a5, t1 +; RV32I-NEXT: add a4, t3, a3 +; RV32I-NEXT: sub a4, a4, t2 +; RV32I-NEXT: add a5, a5, a3 +; RV32I-NEXT: snez t0, a4 +; RV32I-NEXT: snez a7, a5 +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: beqz a6, .LBB11_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a5, t3, a4 +; RV32I-NEXT: sltu t2, t3, t1 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: xor a3, a3, a4 -; RV32I-NEXT: xor a1, a1, a4 -; RV32I-NEXT: add t1, t1, t2 -; RV32I-NEXT: neg a7, t0 -; RV32I-NEXT: add t0, a3, a2 -; RV32I-NEXT: sltu a3, a3, a4 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a2, t1 -; RV32I-NEXT: sub a4, t0, a5 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sltu a3, t0, a5 -; RV32I-NEXT: neg a5, a4 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: snez a3, a4 -; RV32I-NEXT: sltu a4, a5, a6 +; RV32I-NEXT: xor a2, a2, t1 +; RV32I-NEXT: xor a1, a1, t1 +; RV32I-NEXT: add a6, a2, a3 +; RV32I-NEXT: sltu a2, a2, t1 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: sub a3, a6, t2 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: neg a2, a3 +; RV32I-NEXT: sltu a6, a6, t2 +; RV32I-NEXT: sub a1, a1, a6 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: sltu a6, a2, t0 ; RV32I-NEXT: add a1, a1, a3 -; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: add a1, a1, a6 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sw a7, 0(a0) -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sub a2, a2, t0 +; RV32I-NEXT: add a4, a4, a7 +; RV32I-NEXT: neg a3, a4 +; RV32I-NEXT: neg a4, a5 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; @@ -736,87 +736,87 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a7, 4(a2) -; RV32ZBB-NEXT: lw a3, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw t1, 8(a2) +; RV32ZBB-NEXT: lw a7, 0(a2) +; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, a3 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a2, a5 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq t0, a7, .LBB11_2 +; RV32ZBB-NEXT: sltu a1, a4, t1 +; RV32ZBB-NEXT: sub a2, a5, t2 +; RV32ZBB-NEXT: sltu t2, a3, a7 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: mv a2, t2 +; RV32ZBB-NEXT: beq t0, a6, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, t0, a7 +; RV32ZBB-NEXT: sltu a2, t0, a6 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: sltu t3, a3, t1 +; RV32ZBB-NEXT: sub t1, a4, t1 +; RV32ZBB-NEXT: sltu t3, t1, a2 ; RV32ZBB-NEXT: sub a1, a1, t3 -; RV32ZBB-NEXT: sub a3, a3, t1 -; RV32ZBB-NEXT: beq a1, a6, .LBB11_4 +; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: beq a1, a5, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a1 +; RV32ZBB-NEXT: sltu t1, a5, a1 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a4, a3 +; RV32ZBB-NEXT: sltu t1, a4, a2 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a2, a5 -; RV32ZBB-NEXT: beq a7, t0, .LBB11_7 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub a7, a3, a7 +; RV32ZBB-NEXT: beq a6, t0, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a2, t0, a7 +; RV32ZBB-NEXT: sltu a3, t0, a6 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: sltu a2, a2, a5 +; RV32ZBB-NEXT: sltu a3, a3, a7 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a3, a4 -; RV32ZBB-NEXT: or a4, a4, a6 +; RV32ZBB-NEXT: xor a5, a1, a5 +; RV32ZBB-NEXT: xor a4, a2, a4 +; RV32ZBB-NEXT: or a4, a4, a5 ; RV32ZBB-NEXT: beqz a4, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a2, t1 +; RV32ZBB-NEXT: mv a3, t1 ; RV32ZBB-NEXT: .LBB11_10: -; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: xor t0, a5, a4 -; RV32ZBB-NEXT: xor t3, a7, a4 -; RV32ZBB-NEXT: sltu a5, t0, a4 -; RV32ZBB-NEXT: add a6, t3, a2 -; RV32ZBB-NEXT: add t0, t0, a2 -; RV32ZBB-NEXT: sub t1, a6, a5 -; RV32ZBB-NEXT: snez a6, t1 -; RV32ZBB-NEXT: snez t2, t0 -; RV32ZBB-NEXT: or a6, t2, a6 -; RV32ZBB-NEXT: beqz a7, .LBB11_12 +; RV32ZBB-NEXT: neg t1, a3 +; RV32ZBB-NEXT: xor a5, a7, t1 +; RV32ZBB-NEXT: xor t3, a6, t1 +; RV32ZBB-NEXT: sltu t2, a5, t1 +; RV32ZBB-NEXT: add a4, t3, a3 +; RV32ZBB-NEXT: sub a4, a4, t2 +; RV32ZBB-NEXT: add a5, a5, a3 +; RV32ZBB-NEXT: snez t0, a4 +; RV32ZBB-NEXT: snez a7, a5 +; RV32ZBB-NEXT: or t0, a7, t0 +; RV32ZBB-NEXT: beqz a6, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a5, t3, a4 +; RV32ZBB-NEXT: sltu t2, t3, t1 ; RV32ZBB-NEXT: .LBB11_12: -; RV32ZBB-NEXT: xor a3, a3, a4 -; RV32ZBB-NEXT: xor a1, a1, a4 -; RV32ZBB-NEXT: add t1, t1, t2 -; RV32ZBB-NEXT: neg a7, t0 -; RV32ZBB-NEXT: add t0, a3, a2 -; RV32ZBB-NEXT: sltu a3, a3, a4 -; RV32ZBB-NEXT: add a1, a1, a2 -; RV32ZBB-NEXT: neg a2, t1 -; RV32ZBB-NEXT: sub a4, t0, a5 -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sltu a3, t0, a5 -; RV32ZBB-NEXT: neg a5, a4 -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: snez a3, a4 -; RV32ZBB-NEXT: sltu a4, a5, a6 +; RV32ZBB-NEXT: xor a2, a2, t1 +; RV32ZBB-NEXT: xor a1, a1, t1 +; RV32ZBB-NEXT: add a6, a2, a3 +; RV32ZBB-NEXT: sltu a2, a2, t1 ; RV32ZBB-NEXT: add a1, a1, a3 -; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: sub a3, a6, t2 +; RV32ZBB-NEXT: sub a1, a1, a2 +; RV32ZBB-NEXT: neg a2, a3 +; RV32ZBB-NEXT: sltu a6, a6, t2 +; RV32ZBB-NEXT: sub a1, a1, a6 +; RV32ZBB-NEXT: snez a3, a3 +; RV32ZBB-NEXT: sltu a6, a2, t0 +; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a1, a1, a6 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sw a7, 0(a0) -; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sub a2, a2, t0 +; RV32ZBB-NEXT: add a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a4 +; RV32ZBB-NEXT: neg a4, a5 +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -857,87 +857,87 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) -; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw a3, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw t1, 8(a2) +; RV32I-NEXT: lw a7, 0(a2) +; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, a3 -; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a2, a5 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq t0, a7, .LBB12_2 +; RV32I-NEXT: sltu a1, a4, t1 +; RV32I-NEXT: sub a2, a5, t2 +; RV32I-NEXT: sltu t2, a3, a7 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: mv a2, t2 +; RV32I-NEXT: beq t0, a6, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, t0, a7 +; RV32I-NEXT: sltu a2, t0, a6 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: sltu t3, a3, t1 +; RV32I-NEXT: sub t1, a4, t1 +; RV32I-NEXT: sltu t3, t1, a2 ; RV32I-NEXT: sub a1, a1, t3 -; RV32I-NEXT: sub a3, a3, t1 -; RV32I-NEXT: beq a1, a6, .LBB12_4 +; RV32I-NEXT: sub a2, t1, a2 +; RV32I-NEXT: beq a1, a5, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a6, a1 +; RV32I-NEXT: sltu t1, a5, a1 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a4, a3 +; RV32I-NEXT: sltu t1, a4, a2 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a2, a5 -; RV32I-NEXT: beq a7, t0, .LBB12_7 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub a7, a3, a7 +; RV32I-NEXT: beq a6, t0, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a2, t0, a7 +; RV32I-NEXT: sltu a3, t0, a6 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: sltu a2, a2, a5 +; RV32I-NEXT: sltu a3, a3, a7 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a3, a4 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: xor a4, a2, a4 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: beqz a4, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a2, t1 +; RV32I-NEXT: mv a3, t1 ; RV32I-NEXT: .LBB12_10: -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: xor t0, a5, a4 -; RV32I-NEXT: xor t3, a7, a4 -; RV32I-NEXT: sltu a5, t0, a4 -; RV32I-NEXT: add a6, t3, a2 -; RV32I-NEXT: add t0, t0, a2 -; RV32I-NEXT: sub t1, a6, a5 -; RV32I-NEXT: snez a6, t1 -; RV32I-NEXT: snez t2, t0 -; RV32I-NEXT: or a6, t2, a6 -; RV32I-NEXT: beqz a7, .LBB12_12 +; RV32I-NEXT: neg t1, a3 +; RV32I-NEXT: xor a5, a7, t1 +; RV32I-NEXT: xor t3, a6, t1 +; RV32I-NEXT: sltu t2, a5, t1 +; RV32I-NEXT: add a4, t3, a3 +; RV32I-NEXT: sub a4, a4, t2 +; RV32I-NEXT: add a5, a5, a3 +; RV32I-NEXT: snez t0, a4 +; RV32I-NEXT: snez a7, a5 +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: beqz a6, .LBB12_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a5, t3, a4 +; RV32I-NEXT: sltu t2, t3, t1 ; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: xor a3, a3, a4 -; RV32I-NEXT: xor a1, a1, a4 -; RV32I-NEXT: add t1, t1, t2 -; RV32I-NEXT: neg a7, t0 -; RV32I-NEXT: add t0, a3, a2 -; RV32I-NEXT: sltu a3, a3, a4 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a2, t1 -; RV32I-NEXT: sub a4, t0, a5 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sltu a3, t0, a5 -; RV32I-NEXT: neg a5, a4 -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: snez a3, a4 -; RV32I-NEXT: sltu a4, a5, a6 +; RV32I-NEXT: xor a2, a2, t1 +; RV32I-NEXT: xor a1, a1, t1 +; RV32I-NEXT: add a6, a2, a3 +; RV32I-NEXT: sltu a2, a2, t1 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: sub a3, a6, t2 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: neg a2, a3 +; RV32I-NEXT: sltu a6, a6, t2 +; RV32I-NEXT: sub a1, a1, a6 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: sltu a6, a2, t0 ; RV32I-NEXT: add a1, a1, a3 -; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: add a1, a1, a6 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sw a7, 0(a0) -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sub a2, a2, t0 +; RV32I-NEXT: add a4, a4, a7 +; RV32I-NEXT: neg a3, a4 +; RV32I-NEXT: neg a4, a5 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; @@ -969,87 +969,87 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) -; RV32ZBB-NEXT: lw a7, 4(a2) -; RV32ZBB-NEXT: lw a3, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw t1, 8(a2) +; RV32ZBB-NEXT: lw a7, 0(a2) +; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, a3 -; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a2, a5 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq t0, a7, .LBB12_2 +; RV32ZBB-NEXT: sltu a1, a4, t1 +; RV32ZBB-NEXT: sub a2, a5, t2 +; RV32ZBB-NEXT: sltu t2, a3, a7 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: mv a2, t2 +; RV32ZBB-NEXT: beq t0, a6, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, t0, a7 +; RV32ZBB-NEXT: sltu a2, t0, a6 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: sltu t3, a3, t1 +; RV32ZBB-NEXT: sub t1, a4, t1 +; RV32ZBB-NEXT: sltu t3, t1, a2 ; RV32ZBB-NEXT: sub a1, a1, t3 -; RV32ZBB-NEXT: sub a3, a3, t1 -; RV32ZBB-NEXT: beq a1, a6, .LBB12_4 +; RV32ZBB-NEXT: sub a2, t1, a2 +; RV32ZBB-NEXT: beq a1, a5, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a6, a1 +; RV32ZBB-NEXT: sltu t1, a5, a1 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a4, a3 +; RV32ZBB-NEXT: sltu t1, a4, a2 ; RV32ZBB-NEXT: .LBB12_5: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a2, a5 -; RV32ZBB-NEXT: beq a7, t0, .LBB12_7 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub a7, a3, a7 +; RV32ZBB-NEXT: beq a6, t0, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a2, t0, a7 +; RV32ZBB-NEXT: sltu a3, t0, a6 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: -; RV32ZBB-NEXT: sltu a2, a2, a5 +; RV32ZBB-NEXT: sltu a3, a3, a7 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a3, a4 -; RV32ZBB-NEXT: or a4, a4, a6 +; RV32ZBB-NEXT: xor a5, a1, a5 +; RV32ZBB-NEXT: xor a4, a2, a4 +; RV32ZBB-NEXT: or a4, a4, a5 ; RV32ZBB-NEXT: beqz a4, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a2, t1 +; RV32ZBB-NEXT: mv a3, t1 ; RV32ZBB-NEXT: .LBB12_10: -; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: xor t0, a5, a4 -; RV32ZBB-NEXT: xor t3, a7, a4 -; RV32ZBB-NEXT: sltu a5, t0, a4 -; RV32ZBB-NEXT: add a6, t3, a2 -; RV32ZBB-NEXT: add t0, t0, a2 -; RV32ZBB-NEXT: sub t1, a6, a5 -; RV32ZBB-NEXT: snez a6, t1 -; RV32ZBB-NEXT: snez t2, t0 -; RV32ZBB-NEXT: or a6, t2, a6 -; RV32ZBB-NEXT: beqz a7, .LBB12_12 +; RV32ZBB-NEXT: neg t1, a3 +; RV32ZBB-NEXT: xor a5, a7, t1 +; RV32ZBB-NEXT: xor t3, a6, t1 +; RV32ZBB-NEXT: sltu t2, a5, t1 +; RV32ZBB-NEXT: add a4, t3, a3 +; RV32ZBB-NEXT: sub a4, a4, t2 +; RV32ZBB-NEXT: add a5, a5, a3 +; RV32ZBB-NEXT: snez t0, a4 +; RV32ZBB-NEXT: snez a7, a5 +; RV32ZBB-NEXT: or t0, a7, t0 +; RV32ZBB-NEXT: beqz a6, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a5, t3, a4 +; RV32ZBB-NEXT: sltu t2, t3, t1 ; RV32ZBB-NEXT: .LBB12_12: -; RV32ZBB-NEXT: xor a3, a3, a4 -; RV32ZBB-NEXT: xor a1, a1, a4 -; RV32ZBB-NEXT: add t1, t1, t2 -; RV32ZBB-NEXT: neg a7, t0 -; RV32ZBB-NEXT: add t0, a3, a2 -; RV32ZBB-NEXT: sltu a3, a3, a4 -; RV32ZBB-NEXT: add a1, a1, a2 -; RV32ZBB-NEXT: neg a2, t1 -; RV32ZBB-NEXT: sub a4, t0, a5 -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sltu a3, t0, a5 -; RV32ZBB-NEXT: neg a5, a4 -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: snez a3, a4 -; RV32ZBB-NEXT: sltu a4, a5, a6 +; RV32ZBB-NEXT: xor a2, a2, t1 +; RV32ZBB-NEXT: xor a1, a1, t1 +; RV32ZBB-NEXT: add a6, a2, a3 +; RV32ZBB-NEXT: sltu a2, a2, t1 +; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: sub a3, a6, t2 +; RV32ZBB-NEXT: sub a1, a1, a2 +; RV32ZBB-NEXT: neg a2, a3 +; RV32ZBB-NEXT: sltu a6, a6, t2 +; RV32ZBB-NEXT: sub a1, a1, a6 +; RV32ZBB-NEXT: snez a3, a3 +; RV32ZBB-NEXT: sltu a6, a2, t0 ; RV32ZBB-NEXT: add a1, a1, a3 -; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: add a1, a1, a6 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sw a7, 0(a0) -; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sub a2, a2, t0 +; RV32ZBB-NEXT: add a4, a4, a7 +; RV32ZBB-NEXT: neg a3, a4 +; RV32ZBB-NEXT: neg a4, a5 +; RV32ZBB-NEXT: sw a4, 0(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a2, 8(a0) ; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; @@ -1318,8 +1318,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a7, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: beq a5, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: @@ -1393,11 +1393,11 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB17_21: ; RV32I-NEXT: sub a4, t3, a4 ; RV32I-NEXT: sub a3, t1, a3 -; RV32I-NEXT: sub a2, a2, a1 -; RV32I-NEXT: sltu a1, a4, a7 +; RV32I-NEXT: sltu t0, a4, a7 ; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: sub a3, a3, a6 -; RV32I-NEXT: sub a5, a5, a1 +; RV32I-NEXT: sub a5, a5, t0 +; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a4, 8(a0) @@ -1445,8 +1445,8 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a7, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) -; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) ; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: @@ -1520,11 +1520,11 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB17_21: ; RV32ZBB-NEXT: sub a4, t3, a4 ; RV32ZBB-NEXT: sub a3, t1, a3 -; RV32ZBB-NEXT: sub a2, a2, a1 -; RV32ZBB-NEXT: sltu a1, a4, a7 +; RV32ZBB-NEXT: sltu t0, a4, a7 ; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: sub a3, a3, a6 -; RV32ZBB-NEXT: sub a5, a5, a1 +; RV32ZBB-NEXT: sub a5, a5, t0 +; RV32ZBB-NEXT: sub a2, a2, a1 ; RV32ZBB-NEXT: sw a2, 0(a0) ; RV32ZBB-NEXT: sw a3, 4(a0) ; RV32ZBB-NEXT: sw a4, 8(a0) @@ -1763,19 +1763,19 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq t1, a7, .LBB22_2 +; RV32I-NEXT: sltu t1, a5, a6 +; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: beq t0, a7, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t4, t1, a7 +; RV32I-NEXT: sltu t4, t0, a7 ; RV32I-NEXT: .LBB22_2: ; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 @@ -1783,8 +1783,8 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: sltu t3, a1, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t1, a7 -; RV32I-NEXT: xor t6, a6, a5 +; RV32I-NEXT: xor t5, t0, a7 +; RV32I-NEXT: xor t6, a5, a6 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 @@ -1799,32 +1799,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 +; RV32I-NEXT: sltu t1, a6, a5 +; RV32I-NEXT: sub a7, a7, t0 +; RV32I-NEXT: sub a6, a6, a5 +; RV32I-NEXT: sub a5, a7, t1 +; RV32I-NEXT: sltu a7, a6, t5 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, a6, t5 ; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 -; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a1, a4, t4 ; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a7, t0, a7 +; RV32I-NEXT: sub a6, a5, a6 +; RV32I-NEXT: sub a5, a7, t1 +; RV32I-NEXT: sltu a7, a6, t3 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a6, a6, t3 +; RV32I-NEXT: sub a1, a1, t2 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1850,19 +1850,19 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lw a5, 8(a1) ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq t1, a7, .LBB22_2 +; RV32ZBB-NEXT: sltu t1, a5, a6 +; RV32ZBB-NEXT: mv t4, t1 +; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t4, t1, a7 +; RV32ZBB-NEXT: sltu t4, t0, a7 ; RV32ZBB-NEXT: .LBB22_2: ; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 @@ -1870,8 +1870,8 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: sltu t3, a1, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t1, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 +; RV32ZBB-NEXT: xor t5, t0, a7 +; RV32ZBB-NEXT: xor t6, a5, a6 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 @@ -1886,32 +1886,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 +; RV32ZBB-NEXT: sltu t1, a6, a5 +; RV32ZBB-NEXT: sub a7, a7, t0 +; RV32ZBB-NEXT: sub a6, a6, a5 +; RV32ZBB-NEXT: sub a5, a7, t1 +; RV32ZBB-NEXT: sltu a7, a6, t5 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, a6, t5 ; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 -; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a1, a4, t4 ; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a7, t0, a7 +; RV32ZBB-NEXT: sub a6, a5, a6 +; RV32ZBB-NEXT: sub a5, a7, t1 +; RV32ZBB-NEXT: sltu a7, a6, t3 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a6, a6, t3 +; RV32ZBB-NEXT: sub a1, a1, t2 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a1, 4(a0) +; RV32ZBB-NEXT: sw a6, 8(a0) +; RV32ZBB-NEXT: sw a5, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll index 6ef172a6cd618..dc938482d0140 100644 --- a/llvm/test/CodeGen/RISCV/abdu.ll +++ b/llvm/test/CodeGen/RISCV/abdu.ll @@ -198,8 +198,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; RV64ZBB-LABEL: abd_ext_i16_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: slli a1, a1, 32 -; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: srli a1, a1, 32 +; RV64ZBB-NEXT: zext.h a0, a0 ; RV64ZBB-NEXT: minu a2, a0, a1 ; RV64ZBB-NEXT: maxu a0, a0, a1 ; RV64ZBB-NEXT: sub a0, a0, a2 @@ -337,8 +337,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; RV64ZBB-LABEL: abd_ext_i32_i16: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: slli a0, a0, 32 -; RV64ZBB-NEXT: zext.h a1, a1 ; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: zext.h a1, a1 ; RV64ZBB-NEXT: minu a2, a0, a1 ; RV64ZBB-NEXT: maxu a0, a0, a1 ; RV64ZBB-NEXT: sub a0, a0, a2 @@ -540,76 +540,76 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t0, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a5, t1 +; RV32I-NEXT: sltu t2, a3, t0 +; RV32I-NEXT: sub a2, t1, a2 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB11_2 +; RV32I-NEXT: beq a1, a6, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t1, a1, a6 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB11_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a5, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t1, a5, a2 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t1, a4, a7 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 -; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB11_7 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t0, a3, t0 +; RV32I-NEXT: beq a6, a1, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, a1, a6 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: sltu a1, a3, t0 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a4, a6, a4 -; RV32I-NEXT: xor a2, a7, a2 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: beqz a2, .LBB11_10 +; RV32I-NEXT: xor a5, a2, a5 +; RV32I-NEXT: xor a3, a7, a4 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB11_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB11_12 +; RV32I-NEXT: neg t1, a1 +; RV32I-NEXT: xor a3, a7, t1 +; RV32I-NEXT: xor a4, a2, t1 +; RV32I-NEXT: xor a2, t0, t1 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: xor a7, a6, t1 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a6, .LBB11_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 +; RV32I-NEXT: sltu t0, a7, t1 ; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_ext_i128: @@ -636,76 +636,76 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 8(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t0, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a5, t1 +; RV32ZBB-NEXT: sltu t2, a3, t0 +; RV32ZBB-NEXT: sub a2, t1, a2 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB11_2 +; RV32ZBB-NEXT: beq a1, a6, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t1, a1, a6 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB11_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a5, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t1, a5, a2 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t1, a4, a7 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 -; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB11_7 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t0, a3, t0 +; RV32ZBB-NEXT: beq a6, a1, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, a1, a6 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: sltu a1, a3, t0 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a4, a6, a4 -; RV32ZBB-NEXT: xor a2, a7, a2 -; RV32ZBB-NEXT: or a2, a2, a4 -; RV32ZBB-NEXT: beqz a2, .LBB11_10 +; RV32ZBB-NEXT: xor a5, a2, a5 +; RV32ZBB-NEXT: xor a3, a7, a4 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB11_10: -; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB11_12 +; RV32ZBB-NEXT: neg t1, a1 +; RV32ZBB-NEXT: xor a3, a7, t1 +; RV32ZBB-NEXT: xor a4, a2, t1 +; RV32ZBB-NEXT: xor a2, t0, t1 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: xor a7, a6, t1 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beqz a6, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 +; RV32ZBB-NEXT: sltu t0, a7, t1 ; RV32ZBB-NEXT: .LBB11_12: -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_ext_i128: @@ -740,76 +740,76 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t0, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a5, t1 +; RV32I-NEXT: sltu t2, a3, t0 +; RV32I-NEXT: sub a2, t1, a2 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB12_2 +; RV32I-NEXT: beq a1, a6, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t1, a1, a6 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB12_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a5, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t1, a5, a2 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t1, a4, a7 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 -; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB12_7 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t0, a3, t0 +; RV32I-NEXT: beq a6, a1, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, a1, a6 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: sltu a1, a3, t0 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a4, a6, a4 -; RV32I-NEXT: xor a2, a7, a2 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: beqz a2, .LBB12_10 +; RV32I-NEXT: xor a5, a2, a5 +; RV32I-NEXT: xor a3, a7, a4 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB12_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB12_10: -; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB12_12 +; RV32I-NEXT: neg t1, a1 +; RV32I-NEXT: xor a3, a7, t1 +; RV32I-NEXT: xor a4, a2, t1 +; RV32I-NEXT: xor a2, t0, t1 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: xor a7, a6, t1 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a6, .LBB12_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 +; RV32I-NEXT: sltu t0, a7, t1 ; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_ext_i128_undef: @@ -836,76 +836,76 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 8(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t0, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a5, t1 +; RV32ZBB-NEXT: sltu t2, a3, t0 +; RV32ZBB-NEXT: sub a2, t1, a2 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB12_2 +; RV32ZBB-NEXT: beq a1, a6, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t1, a1, a6 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB12_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a5, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t1, a5, a2 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t1, a4, a7 ; RV32ZBB-NEXT: .LBB12_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 -; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB12_7 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t0, a3, t0 +; RV32ZBB-NEXT: beq a6, a1, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, a1, a6 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: -; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: sltu a1, a3, t0 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a4, a6, a4 -; RV32ZBB-NEXT: xor a2, a7, a2 -; RV32ZBB-NEXT: or a2, a2, a4 -; RV32ZBB-NEXT: beqz a2, .LBB12_10 +; RV32ZBB-NEXT: xor a5, a2, a5 +; RV32ZBB-NEXT: xor a3, a7, a4 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB12_10: -; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB12_12 +; RV32ZBB-NEXT: neg t1, a1 +; RV32ZBB-NEXT: xor a3, a7, t1 +; RV32ZBB-NEXT: xor a4, a2, t1 +; RV32ZBB-NEXT: xor a2, t0, t1 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: xor a7, a6, t1 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beqz a6, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 +; RV32ZBB-NEXT: sltu t0, a7, t1 ; RV32ZBB-NEXT: .LBB12_12: -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_ext_i128_undef: @@ -1131,76 +1131,76 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t0, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a5, t1 +; RV32I-NEXT: sltu t2, a3, t0 +; RV32I-NEXT: sub a2, t1, a2 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB17_2 +; RV32I-NEXT: beq a1, a6, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t1, a1, a6 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB17_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a5, .LBB17_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t1, a5, a2 ; RV32I-NEXT: j .LBB17_5 ; RV32I-NEXT: .LBB17_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t1, a4, a7 ; RV32I-NEXT: .LBB17_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 -; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB17_7 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t0, a3, t0 +; RV32I-NEXT: beq a6, a1, .LBB17_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, a1, a6 ; RV32I-NEXT: j .LBB17_8 ; RV32I-NEXT: .LBB17_7: -; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: sltu a1, a3, t0 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: xor a4, a6, a4 -; RV32I-NEXT: xor a2, a7, a2 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: beqz a2, .LBB17_10 +; RV32I-NEXT: xor a5, a2, a5 +; RV32I-NEXT: xor a3, a7, a4 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB17_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB17_12 +; RV32I-NEXT: neg t1, a1 +; RV32I-NEXT: xor a3, a7, t1 +; RV32I-NEXT: xor a4, a2, t1 +; RV32I-NEXT: xor a2, t0, t1 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: xor a7, a6, t1 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a6, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 +; RV32I-NEXT: sltu t0, a7, t1 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_minmax_i128: @@ -1227,76 +1227,76 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 8(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t0, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a5, t1 +; RV32ZBB-NEXT: sltu t2, a3, t0 +; RV32ZBB-NEXT: sub a2, t1, a2 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB17_2 +; RV32ZBB-NEXT: beq a1, a6, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t1, a1, a6 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB17_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a5, .LBB17_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t1, a5, a2 ; RV32ZBB-NEXT: j .LBB17_5 ; RV32ZBB-NEXT: .LBB17_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t1, a4, a7 ; RV32ZBB-NEXT: .LBB17_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 -; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB17_7 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t0, a3, t0 +; RV32ZBB-NEXT: beq a6, a1, .LBB17_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, a1, a6 ; RV32ZBB-NEXT: j .LBB17_8 ; RV32ZBB-NEXT: .LBB17_7: -; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: sltu a1, a3, t0 ; RV32ZBB-NEXT: .LBB17_8: -; RV32ZBB-NEXT: xor a4, a6, a4 -; RV32ZBB-NEXT: xor a2, a7, a2 -; RV32ZBB-NEXT: or a2, a2, a4 -; RV32ZBB-NEXT: beqz a2, .LBB17_10 +; RV32ZBB-NEXT: xor a5, a2, a5 +; RV32ZBB-NEXT: xor a3, a7, a4 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB17_10: -; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB17_12 +; RV32ZBB-NEXT: neg t1, a1 +; RV32ZBB-NEXT: xor a3, a7, t1 +; RV32ZBB-NEXT: xor a4, a2, t1 +; RV32ZBB-NEXT: xor a2, t0, t1 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: xor a7, a6, t1 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beqz a6, .LBB17_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 +; RV32ZBB-NEXT: sltu t0, a7, t1 ; RV32ZBB-NEXT: .LBB17_12: -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_minmax_i128: @@ -1524,76 +1524,76 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t0, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a5, t1 +; RV32I-NEXT: sltu t2, a3, t0 +; RV32I-NEXT: sub a2, t1, a2 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_2 +; RV32I-NEXT: beq a1, a6, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t1, a1, a6 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB22_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a5, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t1, a5, a2 ; RV32I-NEXT: j .LBB22_5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t1, a4, a7 ; RV32I-NEXT: .LBB22_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 -; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB22_7 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t0, a3, t0 +; RV32I-NEXT: beq a6, a1, .LBB22_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, a1, a6 ; RV32I-NEXT: j .LBB22_8 ; RV32I-NEXT: .LBB22_7: -; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: sltu a1, a3, t0 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: xor a4, a6, a4 -; RV32I-NEXT: xor a2, a7, a2 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: beqz a2, .LBB22_10 +; RV32I-NEXT: xor a5, a2, a5 +; RV32I-NEXT: xor a3, a7, a4 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB22_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB22_12 +; RV32I-NEXT: neg t1, a1 +; RV32I-NEXT: xor a3, a7, t1 +; RV32I-NEXT: xor a4, a2, t1 +; RV32I-NEXT: xor a2, t0, t1 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: xor a7, a6, t1 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a6, .LBB22_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 +; RV32I-NEXT: sltu t0, a7, t1 ; RV32I-NEXT: .LBB22_12: -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1620,76 +1620,76 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 8(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t0, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a5, t1 +; RV32ZBB-NEXT: sltu t2, a3, t0 +; RV32ZBB-NEXT: sub a2, t1, a2 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_2 +; RV32ZBB-NEXT: beq a1, a6, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t1, a1, a6 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB22_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a5, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t1, a5, a2 ; RV32ZBB-NEXT: j .LBB22_5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t1, a4, a7 ; RV32ZBB-NEXT: .LBB22_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 -; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB22_7 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t0, a3, t0 +; RV32ZBB-NEXT: beq a6, a1, .LBB22_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, a1, a6 ; RV32ZBB-NEXT: j .LBB22_8 ; RV32ZBB-NEXT: .LBB22_7: -; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: sltu a1, a3, t0 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: xor a4, a6, a4 -; RV32ZBB-NEXT: xor a2, a7, a2 -; RV32ZBB-NEXT: or a2, a2, a4 -; RV32ZBB-NEXT: beqz a2, .LBB22_10 +; RV32ZBB-NEXT: xor a5, a2, a5 +; RV32ZBB-NEXT: xor a3, a7, a4 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB22_12 +; RV32ZBB-NEXT: neg t1, a1 +; RV32ZBB-NEXT: xor a3, a7, t1 +; RV32ZBB-NEXT: xor a4, a2, t1 +; RV32ZBB-NEXT: xor a2, t0, t1 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: xor a7, a6, t1 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beqz a6, .LBB22_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 +; RV32ZBB-NEXT: sltu t0, a7, t1 ; RV32ZBB-NEXT: .LBB22_12: -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: @@ -1918,76 +1918,76 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t0, 0(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a6, 4(a2) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 -; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: sltu a2, a4, a7 +; RV32I-NEXT: sub t1, a5, t1 +; RV32I-NEXT: sltu t2, a3, t0 +; RV32I-NEXT: sub a2, t1, a2 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB27_2 +; RV32I-NEXT: beq a1, a6, .LBB27_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t1, a1, a6 ; RV32I-NEXT: .LBB27_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB27_4 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t3, a7, t1 +; RV32I-NEXT: sub a2, a2, t3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: beq a2, a5, .LBB27_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t1, a5, a2 ; RV32I-NEXT: j .LBB27_5 ; RV32I-NEXT: .LBB27_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t1, a4, a7 ; RV32I-NEXT: .LBB27_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 -; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB27_7 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sub t0, a3, t0 +; RV32I-NEXT: beq a6, a1, .LBB27_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, a1, a6 ; RV32I-NEXT: j .LBB27_8 ; RV32I-NEXT: .LBB27_7: -; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: sltu a1, a3, t0 ; RV32I-NEXT: .LBB27_8: -; RV32I-NEXT: xor a4, a6, a4 -; RV32I-NEXT: xor a2, a7, a2 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: beqz a2, .LBB27_10 +; RV32I-NEXT: xor a5, a2, a5 +; RV32I-NEXT: xor a3, a7, a4 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: beqz a3, .LBB27_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB27_10: -; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB27_12 +; RV32I-NEXT: neg t1, a1 +; RV32I-NEXT: xor a3, a7, t1 +; RV32I-NEXT: xor a4, a2, t1 +; RV32I-NEXT: xor a2, t0, t1 +; RV32I-NEXT: sltu a7, a3, t1 +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: xor a7, a6, t1 +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a6, .LBB27_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 +; RV32I-NEXT: sltu t0, a7, t1 ; RV32I-NEXT: .LBB27_12: -; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 -; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: sub a3, a3, t0 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i128: @@ -2014,76 +2014,76 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_select_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 8(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a5, 12(a1) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t0, 0(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) ; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 -; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: sltu a2, a4, a7 +; RV32ZBB-NEXT: sub t1, a5, t1 +; RV32ZBB-NEXT: sltu t2, a3, t0 +; RV32ZBB-NEXT: sub a2, t1, a2 ; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB27_2 +; RV32ZBB-NEXT: beq a1, a6, .LBB27_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t1, a1, a6 ; RV32ZBB-NEXT: .LBB27_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB27_4 +; RV32ZBB-NEXT: sub a7, a4, a7 +; RV32ZBB-NEXT: sltu t3, a7, t1 +; RV32ZBB-NEXT: sub a2, a2, t3 +; RV32ZBB-NEXT: sub a7, a7, t1 +; RV32ZBB-NEXT: beq a2, a5, .LBB27_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t1, a5, a2 ; RV32ZBB-NEXT: j .LBB27_5 ; RV32ZBB-NEXT: .LBB27_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t1, a4, a7 ; RV32ZBB-NEXT: .LBB27_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 -; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB27_7 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a6, a6, t2 +; RV32ZBB-NEXT: sub t0, a3, t0 +; RV32ZBB-NEXT: beq a6, a1, .LBB27_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, a1, a6 ; RV32ZBB-NEXT: j .LBB27_8 ; RV32ZBB-NEXT: .LBB27_7: -; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: sltu a1, a3, t0 ; RV32ZBB-NEXT: .LBB27_8: -; RV32ZBB-NEXT: xor a4, a6, a4 -; RV32ZBB-NEXT: xor a2, a7, a2 -; RV32ZBB-NEXT: or a2, a2, a4 -; RV32ZBB-NEXT: beqz a2, .LBB27_10 +; RV32ZBB-NEXT: xor a5, a2, a5 +; RV32ZBB-NEXT: xor a3, a7, a4 +; RV32ZBB-NEXT: or a3, a3, a5 +; RV32ZBB-NEXT: beqz a3, .LBB27_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a1, t1 ; RV32ZBB-NEXT: .LBB27_10: -; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB27_12 +; RV32ZBB-NEXT: neg t1, a1 +; RV32ZBB-NEXT: xor a3, a7, t1 +; RV32ZBB-NEXT: xor a4, a2, t1 +; RV32ZBB-NEXT: xor a2, t0, t1 +; RV32ZBB-NEXT: sltu a7, a3, t1 +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: sltu a5, a2, t1 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: xor a7, a6, t1 +; RV32ZBB-NEXT: mv t0, a5 +; RV32ZBB-NEXT: beqz a6, .LBB27_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 +; RV32ZBB-NEXT: sltu t0, a7, t1 ; RV32ZBB-NEXT: .LBB27_12: -; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 -; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: add a3, a3, a1 +; RV32ZBB-NEXT: add a7, a7, a1 +; RV32ZBB-NEXT: sltu a6, a3, t0 +; RV32ZBB-NEXT: sub a3, a3, t0 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_select_i128: diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index 35a39b89a2cb7..70e43f9b31100 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -60,8 +60,8 @@ define signext i32 @add_large_const(i32 signext %a) nounwind { ; ; RV64I-LABEL: add_large_const: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: lui a1, 4095 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: slli a1, a1, 36 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srai a0, a0, 48 @@ -100,8 +100,8 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind { ; ; RV64I-LABEL: add_huge_const: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: lui a1, 32767 +; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: slli a1, a1, 36 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srai a0, a0, 48 @@ -174,13 +174,13 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32I-NEXT: srli a3, a3, 29 ; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: slli a6, a4, 3 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: lui a6, 128 ; RV32I-NEXT: srli a4, a4, 29 ; RV32I-NEXT: slli a1, a1, 3 -; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: add a1, a1, a6 +; RV32I-NEXT: lui a4, 128 +; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) ; RV32I-NEXT: sw a3, 8(a0) @@ -191,46 +191,46 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: srli a2, a0, 61 ; RV64I-NEXT: slli a1, a1, 3 -; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: addi a3, zero, 1 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 51 -; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a3, a3, 51 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a1, a1, a3 ; RV64I-NEXT: jalr zero, 0(ra) ; ; RV32C-LABEL: add_wide_operand: ; RV32C: # %bb.0: -; RV32C-NEXT: c.lw a2, 0(a1) ; RV32C-NEXT: c.lw a4, 12(a1) -; RV32C-NEXT: c.lw a3, 4(a1) +; RV32C-NEXT: c.lw a3, 0(a1) +; RV32C-NEXT: c.lw a2, 4(a1) ; RV32C-NEXT: c.lw a1, 8(a1) ; RV32C-NEXT: c.lui a5, 16 ; RV32C-NEXT: add a6, a4, a5 -; RV32C-NEXT: srli a5, a2, 29 -; RV32C-NEXT: slli a4, a3, 3 +; RV32C-NEXT: srli a5, a3, 29 +; RV32C-NEXT: slli a4, a2, 3 ; RV32C-NEXT: c.or a4, a5 ; RV32C-NEXT: srli a5, a1, 29 -; RV32C-NEXT: c.srli a3, 29 -; RV32C-NEXT: c.slli a1, 3 -; RV32C-NEXT: c.slli a2, 3 +; RV32C-NEXT: c.srli a2, 29 ; RV32C-NEXT: c.slli a6, 3 -; RV32C-NEXT: c.or a1, a3 -; RV32C-NEXT: or a3, a6, a5 -; RV32C-NEXT: c.sw a2, 0(a0) +; RV32C-NEXT: c.slli a1, 3 +; RV32C-NEXT: or a5, a6, a5 +; RV32C-NEXT: c.or a1, a2 +; RV32C-NEXT: c.slli a3, 3 +; RV32C-NEXT: c.sw a3, 0(a0) ; RV32C-NEXT: c.sw a4, 4(a0) ; RV32C-NEXT: c.sw a1, 8(a0) -; RV32C-NEXT: c.sw a3, 12(a0) +; RV32C-NEXT: c.sw a5, 12(a0) ; RV32C-NEXT: c.jr ra ; ; RV64C-LABEL: add_wide_operand: ; RV64C: # %bb.0: ; RV64C-NEXT: srli a2, a0, 61 ; RV64C-NEXT: c.slli a1, 3 -; RV64C-NEXT: c.slli a0, 3 +; RV64C-NEXT: c.li a3, 1 ; RV64C-NEXT: c.or a1, a2 -; RV64C-NEXT: c.li a2, 1 -; RV64C-NEXT: c.slli a2, 51 -; RV64C-NEXT: c.add a1, a2 +; RV64C-NEXT: c.slli a3, 51 +; RV64C-NEXT: c.slli a0, 3 +; RV64C-NEXT: c.add a1, a3 ; RV64C-NEXT: c.jr ra %1 = add i128 %a, 5192296858534827628530496329220096 %2 = shl i128 %1, 3 diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll index 9cb90cc16a519..a658aad111f08 100644 --- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll +++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll @@ -6,33 +6,33 @@ define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) { ; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add: ; NO-ZBA: # %bb.0: # %entry -; NO-ZBA-NEXT: addi a3, a1, 5 -; NO-ZBA-NEXT: sext.w a1, a1 -; NO-ZBA-NEXT: slli a1, a1, 2 -; NO-ZBA-NEXT: add a0, a1, a0 +; NO-ZBA-NEXT: sext.w a3, a1 +; NO-ZBA-NEXT: slli a3, a3, 2 +; NO-ZBA-NEXT: add a0, a3, a0 +; NO-ZBA-NEXT: addi a1, a1, 5 ; NO-ZBA-NEXT: sw a2, 20(a0) ; NO-ZBA-NEXT: sw a2, 24(a0) -; NO-ZBA-NEXT: sw a3, 140(a0) +; NO-ZBA-NEXT: sw a1, 140(a0) ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_sext_shl_moreOneUse_add: ; ZBA: # %bb.0: # %entry -; ZBA-NEXT: addi a3, a1, 5 -; ZBA-NEXT: sext.w a1, a1 -; ZBA-NEXT: sh2add a0, a1, a0 +; ZBA-NEXT: sext.w a3, a1 +; ZBA-NEXT: sh2add a0, a3, a0 +; ZBA-NEXT: addi a1, a1, 5 ; ZBA-NEXT: sw a2, 20(a0) ; ZBA-NEXT: sw a2, 24(a0) -; ZBA-NEXT: sw a3, 140(a0) +; ZBA-NEXT: sw a1, 140(a0) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add: ; XANDESPERF: # %bb.0: # %entry -; XANDESPERF-NEXT: addi a3, a1, 5 -; XANDESPERF-NEXT: sext.w a1, a1 -; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; XANDESPERF-NEXT: sext.w a3, a1 +; XANDESPERF-NEXT: nds.lea.w a0, a0, a3 +; XANDESPERF-NEXT: addi a1, a1, 5 ; XANDESPERF-NEXT: sw a2, 20(a0) ; XANDESPERF-NEXT: sw a2, 24(a0) -; XANDESPERF-NEXT: sw a3, 140(a0) +; XANDESPERF-NEXT: sw a1, 140(a0) ; XANDESPERF-NEXT: ret entry: %add = add nsw i32 %a, 5 @@ -53,41 +53,41 @@ entry: define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b) { ; NO-ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12: ; NO-ZBA: # %bb.0: # %entry -; NO-ZBA-NEXT: addi a3, a1, 2047 -; NO-ZBA-NEXT: lui a4, 2 -; NO-ZBA-NEXT: sext.w a1, a1 -; NO-ZBA-NEXT: addi a3, a3, 1 -; NO-ZBA-NEXT: slli a1, a1, 2 +; NO-ZBA-NEXT: lui a3, 2 +; NO-ZBA-NEXT: sext.w a4, a1 +; NO-ZBA-NEXT: slli a4, a4, 2 +; NO-ZBA-NEXT: add a0, a0, a3 +; NO-ZBA-NEXT: addi a1, a1, 2047 ; NO-ZBA-NEXT: add a0, a0, a4 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: addi a1, a1, 1 ; NO-ZBA-NEXT: sw a2, 0(a0) -; NO-ZBA-NEXT: sw a3, 4(a0) +; NO-ZBA-NEXT: sw a1, 4(a0) ; NO-ZBA-NEXT: sw a2, 120(a0) ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12: ; ZBA: # %bb.0: # %entry -; ZBA-NEXT: addi a3, a1, 2047 -; ZBA-NEXT: lui a4, 2 -; ZBA-NEXT: sext.w a1, a1 -; ZBA-NEXT: addi a3, a3, 1 -; ZBA-NEXT: sh2add a0, a1, a0 -; ZBA-NEXT: add a0, a0, a4 +; ZBA-NEXT: sext.w a3, a1 +; ZBA-NEXT: sh2add a0, a3, a0 +; ZBA-NEXT: lui a3, 2 +; ZBA-NEXT: addi a1, a1, 2047 +; ZBA-NEXT: add a0, a0, a3 +; ZBA-NEXT: addi a1, a1, 1 ; ZBA-NEXT: sw a2, 0(a0) -; ZBA-NEXT: sw a3, 4(a0) +; ZBA-NEXT: sw a1, 4(a0) ; ZBA-NEXT: sw a2, 120(a0) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_sext_shl_moreOneUse_addexceedsign12: ; XANDESPERF: # %bb.0: # %entry -; XANDESPERF-NEXT: addi a3, a1, 2047 +; XANDESPERF-NEXT: sext.w a3, a1 ; XANDESPERF-NEXT: lui a4, 2 -; XANDESPERF-NEXT: sext.w a1, a1 -; XANDESPERF-NEXT: addi a3, a3, 1 -; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; XANDESPERF-NEXT: nds.lea.w a0, a0, a3 +; XANDESPERF-NEXT: addi a1, a1, 2047 ; XANDESPERF-NEXT: add a0, a0, a4 +; XANDESPERF-NEXT: addi a1, a1, 1 ; XANDESPERF-NEXT: sw a2, 0(a0) -; XANDESPERF-NEXT: sw a3, 4(a0) +; XANDESPERF-NEXT: sw a1, 4(a0) ; XANDESPERF-NEXT: sw a2, 120(a0) ; XANDESPERF-NEXT: ret entry: @@ -108,32 +108,32 @@ define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) { ; NO-ZBA-LABEL: add_sext_shl_moreOneUse_sext: ; NO-ZBA: # %bb.0: # %entry ; NO-ZBA-NEXT: sext.w a1, a1 -; NO-ZBA-NEXT: addi a3, a1, 5 -; NO-ZBA-NEXT: slli a1, a1, 2 -; NO-ZBA-NEXT: add a0, a1, a0 +; NO-ZBA-NEXT: slli a3, a1, 2 +; NO-ZBA-NEXT: add a0, a3, a0 +; NO-ZBA-NEXT: addi a1, a1, 5 ; NO-ZBA-NEXT: sw a2, 20(a0) ; NO-ZBA-NEXT: sw a2, 24(a0) -; NO-ZBA-NEXT: sd a3, 140(a0) +; NO-ZBA-NEXT: sd a1, 140(a0) ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_sext_shl_moreOneUse_sext: ; ZBA: # %bb.0: # %entry ; ZBA-NEXT: sext.w a1, a1 -; ZBA-NEXT: addi a3, a1, 5 ; ZBA-NEXT: sh2add a0, a1, a0 +; ZBA-NEXT: addi a1, a1, 5 ; ZBA-NEXT: sw a2, 20(a0) ; ZBA-NEXT: sw a2, 24(a0) -; ZBA-NEXT: sd a3, 140(a0) +; ZBA-NEXT: sd a1, 140(a0) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_sext_shl_moreOneUse_sext: ; XANDESPERF: # %bb.0: # %entry ; XANDESPERF-NEXT: sext.w a1, a1 -; XANDESPERF-NEXT: addi a3, a1, 5 ; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; XANDESPERF-NEXT: addi a1, a1, 5 ; XANDESPERF-NEXT: sw a2, 20(a0) ; XANDESPERF-NEXT: sw a2, 24(a0) -; XANDESPERF-NEXT: sd a3, 140(a0) +; XANDESPERF-NEXT: sd a1, 140(a0) ; XANDESPERF-NEXT: ret entry: %add = add nsw i32 %a, 5 @@ -217,11 +217,11 @@ define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i ; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12: ; NO-ZBA: # %bb.0: # %entry ; NO-ZBA-NEXT: addi a4, a1, 2047 -; NO-ZBA-NEXT: lui a5, 2 -; NO-ZBA-NEXT: slli a6, a1, 2 +; NO-ZBA-NEXT: slli a5, a1, 2 ; NO-ZBA-NEXT: addi a1, a4, 1 -; NO-ZBA-NEXT: add a0, a0, a6 +; NO-ZBA-NEXT: lui a4, 2 ; NO-ZBA-NEXT: add a0, a0, a5 +; NO-ZBA-NEXT: add a0, a0, a4 ; NO-ZBA-NEXT: mv a4, a1 ; NO-ZBA-NEXT: bgtz a3, .LBB4_2 ; NO-ZBA-NEXT: # %bb.1: # %entry @@ -235,25 +235,25 @@ define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i ; ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12: ; ZBA: # %bb.0: # %entry ; ZBA-NEXT: addi a4, a1, 2047 -; ZBA-NEXT: lui a5, 2 -; ZBA-NEXT: addi a4, a4, 1 -; ZBA-NEXT: sh2add a0, a1, a0 -; ZBA-NEXT: add a0, a0, a5 -; ZBA-NEXT: mv a1, a4 +; ZBA-NEXT: sh2add a1, a1, a0 +; ZBA-NEXT: addi a0, a4, 1 +; ZBA-NEXT: lui a4, 2 +; ZBA-NEXT: add a1, a1, a4 +; ZBA-NEXT: mv a4, a0 ; ZBA-NEXT: bgtz a3, .LBB4_2 ; ZBA-NEXT: # %bb.1: # %entry -; ZBA-NEXT: mv a1, a2 +; ZBA-NEXT: mv a4, a2 ; ZBA-NEXT: .LBB4_2: # %entry -; ZBA-NEXT: sw a1, 0(a0) -; ZBA-NEXT: sw a1, 4(a0) -; ZBA-NEXT: sw a4, 120(a0) +; ZBA-NEXT: sw a4, 0(a1) +; ZBA-NEXT: sw a4, 4(a1) +; ZBA-NEXT: sw a0, 120(a1) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12: ; XANDESPERF: # %bb.0: # %entry ; XANDESPERF-NEXT: addi a4, a1, 2047 -; XANDESPERF-NEXT: lui a5, 2 ; XANDESPERF-NEXT: addi a4, a4, 1 +; XANDESPERF-NEXT: lui a5, 2 ; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 ; XANDESPERF-NEXT: add a0, a0, a5 ; XANDESPERF-NEXT: mv a1, a4 @@ -342,10 +342,10 @@ entry: define i64 @add_shl_moreOneUse_sh1add(i64 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh1add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 1 -; NO-ZBA-NEXT: ori a0, a0, 2 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 1 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 2 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh1add: @@ -368,10 +368,10 @@ define i64 @add_shl_moreOneUse_sh1add(i64 %x) { define i64 @add_shl_moreOneUse_sh2add(i64 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh2add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 2 -; NO-ZBA-NEXT: ori a0, a0, 4 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 2 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 4 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh2add: @@ -394,10 +394,10 @@ define i64 @add_shl_moreOneUse_sh2add(i64 %x) { define i64 @add_shl_moreOneUse_sh3add(i64 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh3add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 3 -; NO-ZBA-NEXT: ori a0, a0, 8 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 3 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 8 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh3add: @@ -433,10 +433,10 @@ define i64 @add_shl_sext(i32 %1) { define i64 @add_shl_moreOneUse_sh4add(i64 %x) { ; RV64-LABEL: add_shl_moreOneUse_sh4add: ; RV64: # %bb.0: -; RV64-NEXT: ori a1, a0, 1 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: ori a0, a0, 16 -; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: ori a0, a0, 1 +; RV64-NEXT: ori a1, a1, 16 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: ret %or = or i64 %x, 1 %mul = shl i64 %or, 4 diff --git a/llvm/test/CodeGen/RISCV/add_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_shl_constant.ll index bb0456f7fe1a6..2745683be697c 100644 --- a/llvm/test/CodeGen/RISCV/add_shl_constant.ll +++ b/llvm/test/CodeGen/RISCV/add_shl_constant.ll @@ -31,30 +31,30 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind { define void @add_shl_moreOneUse_inStore(ptr %array1, i32 %a, i32 %b) { ; NO-ZBA-LABEL: add_shl_moreOneUse_inStore: ; NO-ZBA: # %bb.0: # %entry -; NO-ZBA-NEXT: addi a3, a1, 5 -; NO-ZBA-NEXT: slli a1, a1, 2 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a3, a1, 2 +; NO-ZBA-NEXT: add a0, a0, a3 +; NO-ZBA-NEXT: addi a1, a1, 5 ; NO-ZBA-NEXT: sw a2, 20(a0) ; NO-ZBA-NEXT: sw a2, 24(a0) -; NO-ZBA-NEXT: sw a3, 140(a0) +; NO-ZBA-NEXT: sw a1, 140(a0) ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_inStore: ; ZBA: # %bb.0: # %entry -; ZBA-NEXT: addi a3, a1, 5 ; ZBA-NEXT: sh2add a0, a1, a0 +; ZBA-NEXT: addi a1, a1, 5 ; ZBA-NEXT: sw a2, 20(a0) ; ZBA-NEXT: sw a2, 24(a0) -; ZBA-NEXT: sw a3, 140(a0) +; ZBA-NEXT: sw a1, 140(a0) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_shl_moreOneUse_inStore: ; XANDESPERF: # %bb.0: # %entry -; XANDESPERF-NEXT: addi a3, a1, 5 ; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 +; XANDESPERF-NEXT: addi a1, a1, 5 ; XANDESPERF-NEXT: sw a2, 20(a0) ; XANDESPERF-NEXT: sw a2, 24(a0) -; XANDESPERF-NEXT: sw a3, 140(a0) +; XANDESPERF-NEXT: sw a1, 140(a0) ; XANDESPERF-NEXT: ret entry: %add = add nsw i32 %a, 5 @@ -71,38 +71,38 @@ entry: define void @add_shl_moreOneUse_inStore_addexceedsign12(ptr %array1, i32 %a, i32 %b) { ; NO-ZBA-LABEL: add_shl_moreOneUse_inStore_addexceedsign12: ; NO-ZBA: # %bb.0: # %entry -; NO-ZBA-NEXT: addi a3, a1, 2047 +; NO-ZBA-NEXT: slli a3, a1, 2 ; NO-ZBA-NEXT: lui a4, 2 -; NO-ZBA-NEXT: slli a1, a1, 2 -; NO-ZBA-NEXT: addi a3, a3, 1 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: add a0, a0, a3 +; NO-ZBA-NEXT: addi a1, a1, 2047 ; NO-ZBA-NEXT: add a0, a0, a4 +; NO-ZBA-NEXT: addi a1, a1, 1 ; NO-ZBA-NEXT: sw a2, 0(a0) -; NO-ZBA-NEXT: sw a3, 4(a0) +; NO-ZBA-NEXT: sw a1, 4(a0) ; NO-ZBA-NEXT: sw a2, 120(a0) ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_inStore_addexceedsign12: ; ZBA: # %bb.0: # %entry -; ZBA-NEXT: addi a3, a1, 2047 -; ZBA-NEXT: lui a4, 2 ; ZBA-NEXT: sh2add a0, a1, a0 -; ZBA-NEXT: addi a3, a3, 1 -; ZBA-NEXT: add a0, a0, a4 +; ZBA-NEXT: lui a3, 2 +; ZBA-NEXT: addi a1, a1, 2047 +; ZBA-NEXT: add a0, a0, a3 +; ZBA-NEXT: addi a1, a1, 1 ; ZBA-NEXT: sw a2, 0(a0) -; ZBA-NEXT: sw a3, 4(a0) +; ZBA-NEXT: sw a1, 4(a0) ; ZBA-NEXT: sw a2, 120(a0) ; ZBA-NEXT: ret ; ; XANDESPERF-LABEL: add_shl_moreOneUse_inStore_addexceedsign12: ; XANDESPERF: # %bb.0: # %entry -; XANDESPERF-NEXT: addi a3, a1, 2047 -; XANDESPERF-NEXT: lui a4, 2 +; XANDESPERF-NEXT: lui a3, 2 ; XANDESPERF-NEXT: nds.lea.w a0, a0, a1 -; XANDESPERF-NEXT: addi a3, a3, 1 -; XANDESPERF-NEXT: add a0, a0, a4 +; XANDESPERF-NEXT: addi a1, a1, 2047 +; XANDESPERF-NEXT: add a0, a0, a3 +; XANDESPERF-NEXT: addi a1, a1, 1 ; XANDESPERF-NEXT: sw a2, 0(a0) -; XANDESPERF-NEXT: sw a3, 4(a0) +; XANDESPERF-NEXT: sw a1, 4(a0) ; XANDESPERF-NEXT: sw a2, 120(a0) ; XANDESPERF-NEXT: ret entry: @@ -184,8 +184,8 @@ define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i3 ; NO-ZBA-NEXT: # %bb.1: # %entry ; NO-ZBA-NEXT: mv a5, a2 ; NO-ZBA-NEXT: .LBB4_2: # %entry -; NO-ZBA-NEXT: lui a2, 2 ; NO-ZBA-NEXT: slli a1, a1, 2 +; NO-ZBA-NEXT: lui a2, 2 ; NO-ZBA-NEXT: add a0, a0, a1 ; NO-ZBA-NEXT: add a0, a0, a2 ; NO-ZBA-NEXT: sw a5, 0(a0) @@ -202,9 +202,9 @@ define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i3 ; ZBA-NEXT: # %bb.1: # %entry ; ZBA-NEXT: mv a5, a2 ; ZBA-NEXT: .LBB4_2: # %entry -; ZBA-NEXT: lui a2, 2 ; ZBA-NEXT: sh2add a0, a1, a0 -; ZBA-NEXT: add a0, a0, a2 +; ZBA-NEXT: lui a1, 2 +; ZBA-NEXT: add a0, a0, a1 ; ZBA-NEXT: sw a5, 0(a0) ; ZBA-NEXT: sw a5, 4(a0) ; ZBA-NEXT: sw a4, 120(a0) @@ -243,10 +243,10 @@ entry: define i32 @add_shl_moreOneUse_sh1add(i32 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh1add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 1 -; NO-ZBA-NEXT: ori a0, a0, 2 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 1 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 2 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh1add: @@ -269,10 +269,10 @@ define i32 @add_shl_moreOneUse_sh1add(i32 %x) { define i32 @add_shl_moreOneUse_sh2add(i32 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh2add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 2 -; NO-ZBA-NEXT: ori a0, a0, 4 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 2 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 4 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh2add: @@ -295,10 +295,10 @@ define i32 @add_shl_moreOneUse_sh2add(i32 %x) { define i32 @add_shl_moreOneUse_sh3add(i32 %x) { ; NO-ZBA-LABEL: add_shl_moreOneUse_sh3add: ; NO-ZBA: # %bb.0: -; NO-ZBA-NEXT: ori a1, a0, 1 -; NO-ZBA-NEXT: slli a0, a0, 3 -; NO-ZBA-NEXT: ori a0, a0, 8 -; NO-ZBA-NEXT: add a0, a0, a1 +; NO-ZBA-NEXT: slli a1, a0, 3 +; NO-ZBA-NEXT: ori a0, a0, 1 +; NO-ZBA-NEXT: ori a1, a1, 8 +; NO-ZBA-NEXT: add a0, a1, a0 ; NO-ZBA-NEXT: ret ; ; ZBA-LABEL: add_shl_moreOneUse_sh3add: @@ -321,10 +321,10 @@ define i32 @add_shl_moreOneUse_sh3add(i32 %x) { define i32 @add_shl_moreOneUse_sh4add(i32 %x) { ; RV32-LABEL: add_shl_moreOneUse_sh4add: ; RV32: # %bb.0: -; RV32-NEXT: ori a1, a0, 1 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: ori a0, a0, 16 -; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: ori a1, a1, 16 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret %or = or i32 %x, 1 %mul = shl i32 %or, 4 diff --git a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll index 5fd8261e27cc3..1c3e2e5119a7d 100644 --- a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll +++ b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll @@ -7,8 +7,8 @@ define i64 @addc_adde(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: addc_adde: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll index ff0d1e75c746c..b092ffb2044da 100644 --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -12,18 +12,18 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32: # %bb.0: ; RISCV32-NEXT: mul a4, a0, a3 ; RISCV32-NEXT: mulhu a5, a0, a2 -; RISCV32-NEXT: mul a6, a1, a2 -; RISCV32-NEXT: mulhu a7, a0, a3 -; RISCV32-NEXT: mulhu t0, a1, a2 -; RISCV32-NEXT: add t1, a5, a4 -; RISCV32-NEXT: add a7, a7, t0 -; RISCV32-NEXT: add a4, t1, a6 -; RISCV32-NEXT: sltu a5, t1, a5 -; RISCV32-NEXT: sltu a6, a4, t1 -; RISCV32-NEXT: add a5, a7, a5 -; RISCV32-NEXT: add a5, a5, a6 -; RISCV32-NEXT: mul a6, a1, a3 -; RISCV32-NEXT: add a5, a5, a6 +; RISCV32-NEXT: mulhu a6, a0, a3 +; RISCV32-NEXT: mulhu a7, a1, a2 +; RISCV32-NEXT: mul t0, a1, a2 +; RISCV32-NEXT: mul t1, a1, a3 +; RISCV32-NEXT: add a6, a6, a7 +; RISCV32-NEXT: add a7, a5, a4 +; RISCV32-NEXT: add a4, a7, t0 +; RISCV32-NEXT: sltu a5, a7, a5 +; RISCV32-NEXT: sltu a7, a4, a7 +; RISCV32-NEXT: add a5, a6, a5 +; RISCV32-NEXT: add a5, a5, a7 +; RISCV32-NEXT: add a5, a5, t1 ; RISCV32-NEXT: bgez a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: ; RISCV32-NEXT: sub a5, a5, a2 @@ -32,12 +32,12 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-NEXT: # %bb.3: ; RISCV32-NEXT: sub a5, a5, a0 ; RISCV32-NEXT: .LBB0_4: +; RISCV32-NEXT: mul a0, a0, a2 ; RISCV32-NEXT: slli a5, a5, 30 ; RISCV32-NEXT: srli a1, a4, 2 ; RISCV32-NEXT: slli a4, a4, 30 -; RISCV32-NEXT: mul a0, a0, a2 -; RISCV32-NEXT: or a1, a5, a1 ; RISCV32-NEXT: srli a0, a0, 2 +; RISCV32-NEXT: or a1, a5, a1 ; RISCV32-NEXT: or a0, a4, a0 ; RISCV32-NEXT: ret %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2); @@ -49,10 +49,10 @@ define { i32, i32, i1 } @addcarry_2x32(i32 %x0, i32 %x1, i32 %y0, i32 %y1) nounw ; RISCV32-LABEL: addcarry_2x32: ; RISCV32: # %bb.0: ; RISCV32-NEXT: add a3, a1, a3 -; RISCV32-NEXT: add a4, a2, a4 ; RISCV32-NEXT: sltu a1, a3, a1 -; RISCV32-NEXT: sltu a2, a4, a2 +; RISCV32-NEXT: add a4, a2, a4 ; RISCV32-NEXT: add a1, a4, a1 +; RISCV32-NEXT: sltu a2, a4, a2 ; RISCV32-NEXT: sltu a4, a1, a4 ; RISCV32-NEXT: or a2, a2, a4 ; RISCV32-NEXT: sw a3, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 3b2cab2b66303..f0e8798cd6a04 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -53,17 +53,17 @@ define i64 @add_mul_combine_accept_a3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_accept_a3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 29 -; RV32IMB-NEXT: sh1add a3, a1, a1 -; RV32IMB-NEXT: slli a1, a1, 5 -; RV32IMB-NEXT: sub a1, a1, a3 -; RV32IMB-NEXT: sh1add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 +; RV32IMB-NEXT: sh1add a3, a0, a0 ; RV32IMB-NEXT: slli a0, a0, 5 +; RV32IMB-NEXT: sh1add a4, a1, a1 ; RV32IMB-NEXT: sub a3, a0, a3 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: slli a1, a1, 5 ; RV32IMB-NEXT: addi a0, a3, 1073 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a4 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_accept_a3: @@ -83,9 +83,9 @@ define i32 @add_mul_combine_accept_b1(i32 %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: sh3add a1, a0, a0 ; RV32IMB-NEXT: slli a0, a0, 5 +; RV32IMB-NEXT: lui a2, 50 ; RV32IMB-NEXT: sub a0, a0, a1 -; RV32IMB-NEXT: lui a1, 50 -; RV32IMB-NEXT: addi a1, a1, 1119 +; RV32IMB-NEXT: addi a1, a2, 1119 ; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; @@ -93,9 +93,9 @@ define i32 @add_mul_combine_accept_b1(i32 %x) { ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: slli a0, a0, 5 +; RV64IMB-NEXT: lui a2, 50 ; RV64IMB-NEXT: sub a0, a0, a1 -; RV64IMB-NEXT: lui a1, 50 -; RV64IMB-NEXT: addi a1, a1, 1119 +; RV64IMB-NEXT: addi a1, a2, 1119 ; RV64IMB-NEXT: addw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 8953 @@ -108,9 +108,9 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: sh3add a1, a0, a0 ; RV32IMB-NEXT: slli a0, a0, 5 +; RV32IMB-NEXT: lui a2, 50 ; RV32IMB-NEXT: sub a0, a0, a1 -; RV32IMB-NEXT: lui a1, 50 -; RV32IMB-NEXT: addi a1, a1, 1119 +; RV32IMB-NEXT: addi a1, a2, 1119 ; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; @@ -118,9 +118,9 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) { ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: slli a0, a0, 5 +; RV64IMB-NEXT: lui a2, 50 ; RV64IMB-NEXT: sub a0, a0, a1 -; RV64IMB-NEXT: lui a1, 50 -; RV64IMB-NEXT: addi a1, a1, 1119 +; RV64IMB-NEXT: addi a1, a2, 1119 ; RV64IMB-NEXT: addw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 8953 @@ -132,28 +132,28 @@ define i64 @add_mul_combine_accept_b3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_accept_b3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 23 -; RV32IMB-NEXT: sh3add a3, a1, a1 -; RV32IMB-NEXT: slli a1, a1, 5 -; RV32IMB-NEXT: sub a1, a1, a3 ; RV32IMB-NEXT: sh3add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: slli a0, a0, 5 ; RV32IMB-NEXT: sub a3, a0, a3 ; RV32IMB-NEXT: lui a0, 50 +; RV32IMB-NEXT: sh3add a4, a1, a1 ; RV32IMB-NEXT: addi a0, a0, 1119 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: slli a1, a1, 5 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a4 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_accept_b3: ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: slli a0, a0, 5 +; RV64IMB-NEXT: lui a2, 50 ; RV64IMB-NEXT: sub a0, a0, a1 -; RV64IMB-NEXT: lui a1, 50 -; RV64IMB-NEXT: addi a1, a1, 1119 +; RV64IMB-NEXT: addi a1, a2, 1119 ; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i64 %x, 8953 @@ -207,19 +207,19 @@ define i64 @add_mul_combine_reject_a3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_a3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 29 -; RV32IMB-NEXT: sh1add a3, a1, a1 -; RV32IMB-NEXT: slli a1, a1, 5 -; RV32IMB-NEXT: sub a1, a1, a3 ; RV32IMB-NEXT: sh1add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: slli a0, a0, 5 ; RV32IMB-NEXT: sub a3, a0, a3 ; RV32IMB-NEXT: lui a0, 14 +; RV32IMB-NEXT: sh1add a4, a1, a1 ; RV32IMB-NEXT: addi a0, a0, -185 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: slli a1, a1, 5 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a4 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_a3: @@ -278,17 +278,17 @@ define i64 @add_mul_combine_reject_c3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_c3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 73 -; RV32IMB-NEXT: sh3add a3, a1, a1 -; RV32IMB-NEXT: sh3add a1, a3, a1 ; RV32IMB-NEXT: sh3add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 +; RV32IMB-NEXT: sh3add a4, a1, a1 ; RV32IMB-NEXT: sh3add a3, a3, a0 ; RV32IMB-NEXT: lui a0, 18 ; RV32IMB-NEXT: addi a0, a0, -728 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: sh3add a1, a4, a1 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_c3: @@ -344,17 +344,17 @@ define i64 @add_mul_combine_reject_d3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_d3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 192 -; RV32IMB-NEXT: sh1add a1, a1, a1 ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: sh1add a0, a0, a0 +; RV32IMB-NEXT: sh1add a1, a1, a1 +; RV32IMB-NEXT: lui a3, 47 +; RV32IMB-NEXT: slli a4, a0, 6 +; RV32IMB-NEXT: addi a0, a3, -512 +; RV32IMB-NEXT: add a0, a4, a0 ; RV32IMB-NEXT: slli a1, a1, 6 +; RV32IMB-NEXT: sltu a3, a0, a4 +; RV32IMB-NEXT: add a1, a1, a3 ; RV32IMB-NEXT: add a1, a2, a1 -; RV32IMB-NEXT: lui a2, 47 -; RV32IMB-NEXT: slli a3, a0, 6 -; RV32IMB-NEXT: addi a0, a2, -512 -; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_d3: @@ -414,19 +414,19 @@ define i64 @add_mul_combine_reject_e3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_e3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 29 -; RV32IMB-NEXT: sh1add a3, a1, a1 -; RV32IMB-NEXT: slli a1, a1, 5 -; RV32IMB-NEXT: sub a1, a1, a3 ; RV32IMB-NEXT: sh1add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: slli a0, a0, 5 ; RV32IMB-NEXT: sub a3, a0, a3 ; RV32IMB-NEXT: lui a0, 14 +; RV32IMB-NEXT: sh1add a4, a1, a1 ; RV32IMB-NEXT: addi a0, a0, -185 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: slli a1, a1, 5 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a4 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_e3: @@ -491,19 +491,19 @@ define i64 @add_mul_combine_reject_f3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_f3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 29 -; RV32IMB-NEXT: sh1add a3, a1, a1 -; RV32IMB-NEXT: slli a1, a1, 5 -; RV32IMB-NEXT: sub a1, a1, a3 ; RV32IMB-NEXT: sh1add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: slli a0, a0, 5 ; RV32IMB-NEXT: sub a3, a0, a3 ; RV32IMB-NEXT: lui a0, 14 +; RV32IMB-NEXT: sh1add a4, a1, a1 ; RV32IMB-NEXT: addi a0, a0, -145 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: slli a1, a1, 5 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sub a1, a1, a4 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_f3: @@ -565,17 +565,17 @@ define i64 @add_mul_combine_reject_g3(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_g3: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 73 -; RV32IMB-NEXT: sh3add a3, a1, a1 -; RV32IMB-NEXT: sh3add a1, a3, a1 ; RV32IMB-NEXT: sh3add a3, a0, a0 ; RV32IMB-NEXT: mulhu a2, a0, a2 +; RV32IMB-NEXT: sh3add a4, a1, a1 ; RV32IMB-NEXT: sh3add a3, a3, a0 ; RV32IMB-NEXT: lui a0, 2 ; RV32IMB-NEXT: addi a0, a0, -882 -; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: sh3add a1, a4, a1 ; RV32IMB-NEXT: add a0, a3, a0 -; RV32IMB-NEXT: sltu a2, a0, a3 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a1, a3 +; RV32IMB-NEXT: add a1, a2, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_g3: @@ -595,15 +595,15 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV32IMB-LABEL: add_mul_combine_infinite_loop: ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: li a2, 24 -; RV32IMB-NEXT: sh1add a1, a1, a1 +; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: sh1add a3, a0, a0 -; RV32IMB-NEXT: mulhu a0, a0, a2 -; RV32IMB-NEXT: li a2, 1 -; RV32IMB-NEXT: sh3add a1, a1, a0 -; RV32IMB-NEXT: slli a4, a3, 3 -; RV32IMB-NEXT: slli a2, a2, 11 -; RV32IMB-NEXT: sh3add a0, a3, a2 -; RV32IMB-NEXT: sltu a2, a0, a4 +; RV32IMB-NEXT: li a0, 1 +; RV32IMB-NEXT: sh1add a1, a1, a1 +; RV32IMB-NEXT: slli a0, a0, 11 +; RV32IMB-NEXT: sh3add a0, a3, a0 +; RV32IMB-NEXT: sh3add a1, a1, a2 +; RV32IMB-NEXT: slli a3, a3, 3 +; RV32IMB-NEXT: sltu a2, a0, a3 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret ; @@ -622,8 +622,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { define i32 @mul3000_add8990_a(i32 %x) { ; RV32IMB-LABEL: mul3000_add8990_a: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1 +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, -10 @@ -631,8 +631,8 @@ define i32 @mul3000_add8990_a(i32 %x) { ; ; RV64IMB-LABEL: mul3000_add8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 @@ -645,8 +645,8 @@ define i32 @mul3000_add8990_a(i32 %x) { define signext i32 @mul3000_add8990_b(i32 signext %x) { ; RV32IMB-LABEL: mul3000_add8990_b: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1 +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, -10 @@ -654,8 +654,8 @@ define signext i32 @mul3000_add8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mul3000_add8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 @@ -670,21 +670,21 @@ define i64 @mul3000_add8990_c(i64 %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: lui a2, 1 ; RV32IMB-NEXT: addi a2, a2, -1096 +; RV32IMB-NEXT: mul a3, a0, a2 ; RV32IMB-NEXT: mul a1, a1, a2 -; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: mul a2, a0, a2 +; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: lui a0, 2 ; RV32IMB-NEXT: addi a0, a0, 798 -; RV32IMB-NEXT: add a1, a3, a1 -; RV32IMB-NEXT: add a0, a2, a0 -; RV32IMB-NEXT: sltu a2, a0, a2 +; RV32IMB-NEXT: add a0, a3, a0 +; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: sltu a2, a0, a3 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_add8990_c: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addi a0, a0, -10 @@ -697,8 +697,8 @@ define i64 @mul3000_add8990_c(i64 %x) { define i32 @mul3000_sub8990_a(i32 %x) { ; RV32IMB-LABEL: mul3000_sub8990_a: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1 +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, 10 @@ -706,8 +706,8 @@ define i32 @mul3000_sub8990_a(i32 %x) { ; ; RV64IMB-LABEL: mul3000_sub8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -720,8 +720,8 @@ define i32 @mul3000_sub8990_a(i32 %x) { define signext i32 @mul3000_sub8990_b(i32 signext %x) { ; RV32IMB-LABEL: mul3000_sub8990_b: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1 +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, 10 @@ -729,8 +729,8 @@ define signext i32 @mul3000_sub8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mul3000_sub8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -745,22 +745,22 @@ define i64 @mul3000_sub8990_c(i64 %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: lui a2, 1 ; RV32IMB-NEXT: addi a2, a2, -1096 +; RV32IMB-NEXT: mul a3, a0, a2 ; RV32IMB-NEXT: mul a1, a1, a2 -; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: mul a2, a0, a2 +; RV32IMB-NEXT: mulhu a2, a0, a2 ; RV32IMB-NEXT: lui a0, 1048574 ; RV32IMB-NEXT: addi a0, a0, -798 -; RV32IMB-NEXT: add a1, a3, a1 -; RV32IMB-NEXT: add a0, a2, a0 -; RV32IMB-NEXT: sltu a2, a0, a2 +; RV32IMB-NEXT: add a0, a3, a0 +; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: sltu a2, a0, a3 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: addi a1, a1, -1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_sub8990_c: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addi a0, a0, 10 @@ -773,8 +773,8 @@ define i64 @mul3000_sub8990_c(i64 %x) { define i32 @mulneg3000_add8990_a(i32 %x) { ; RV32IMB-LABEL: mulneg3000_add8990_a: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1048575 +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, -10 @@ -782,8 +782,8 @@ define i32 @mulneg3000_add8990_a(i32 %x) { ; ; RV64IMB-LABEL: mulneg3000_add8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 @@ -796,8 +796,8 @@ define i32 @mulneg3000_add8990_a(i32 %x) { define signext i32 @mulneg3000_add8990_b(i32 signext %x) { ; RV32IMB-LABEL: mulneg3000_add8990_b: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1048575 +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, -10 @@ -805,8 +805,8 @@ define signext i32 @mulneg3000_add8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mulneg3000_add8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, -10 @@ -821,22 +821,22 @@ define i64 @mulneg3000_add8990_c(i64 %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: lui a2, 1048575 ; RV32IMB-NEXT: addi a2, a2, 1096 -; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: mul a2, a0, a2 +; RV32IMB-NEXT: mul a4, a0, a2 +; RV32IMB-NEXT: mul a1, a1, a2 +; RV32IMB-NEXT: lui a2, 2 +; RV32IMB-NEXT: addi a2, a2, 798 ; RV32IMB-NEXT: sub a3, a3, a0 -; RV32IMB-NEXT: lui a0, 2 -; RV32IMB-NEXT: addi a0, a0, 798 -; RV32IMB-NEXT: add a0, a2, a0 +; RV32IMB-NEXT: add a0, a4, a2 ; RV32IMB-NEXT: add a1, a3, a1 -; RV32IMB-NEXT: sltu a2, a0, a2 +; RV32IMB-NEXT: sltu a2, a0, a4 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_add8990_c: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addi a0, a0, -10 @@ -849,8 +849,8 @@ define i64 @mulneg3000_add8990_c(i64 %x) { define i32 @mulneg3000_sub8990_a(i32 %x) { ; RV32IMB-LABEL: mulneg3000_sub8990_a: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1048575 +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, 10 @@ -858,8 +858,8 @@ define i32 @mulneg3000_sub8990_a(i32 %x) { ; ; RV64IMB-LABEL: mulneg3000_sub8990_a: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -872,8 +872,8 @@ define i32 @mulneg3000_sub8990_a(i32 %x) { define signext i32 @mulneg3000_sub8990_b(i32 signext %x) { ; RV32IMB-LABEL: mulneg3000_sub8990_b: ; RV32IMB: # %bb.0: -; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1048575 +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 ; RV32IMB-NEXT: addi a0, a0, 10 @@ -881,8 +881,8 @@ define signext i32 @mulneg3000_sub8990_b(i32 signext %x) { ; ; RV64IMB-LABEL: mulneg3000_sub8990_b: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addiw a0, a0, 10 @@ -897,23 +897,23 @@ define i64 @mulneg3000_sub8990_c(i64 %x) { ; RV32IMB: # %bb.0: ; RV32IMB-NEXT: lui a2, 1048575 ; RV32IMB-NEXT: addi a2, a2, 1096 -; RV32IMB-NEXT: mul a1, a1, a2 ; RV32IMB-NEXT: mulhu a3, a0, a2 -; RV32IMB-NEXT: mul a2, a0, a2 +; RV32IMB-NEXT: mul a4, a0, a2 +; RV32IMB-NEXT: mul a1, a1, a2 +; RV32IMB-NEXT: lui a2, 1048574 +; RV32IMB-NEXT: addi a2, a2, -798 ; RV32IMB-NEXT: sub a3, a3, a0 -; RV32IMB-NEXT: lui a0, 1048574 -; RV32IMB-NEXT: addi a0, a0, -798 -; RV32IMB-NEXT: add a0, a2, a0 +; RV32IMB-NEXT: add a0, a4, a2 ; RV32IMB-NEXT: add a1, a3, a1 -; RV32IMB-NEXT: sltu a2, a0, a2 +; RV32IMB-NEXT: sltu a2, a0, a4 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: addi a1, a1, -1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_sub8990_c: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: addi a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 ; RV64IMB-NEXT: addi a0, a0, 10 @@ -948,14 +948,14 @@ define i1 @pr53831(i32 %x) { define i64 @sh2add_uw(i64 signext %0, i32 signext %1) { ; RV32IMB-LABEL: sh2add_uw: ; RV32IMB: # %bb.0: # %entry -; RV32IMB-NEXT: srli a3, a2, 27 -; RV32IMB-NEXT: slli a2, a2, 5 +; RV32IMB-NEXT: slli a3, a2, 5 ; RV32IMB-NEXT: srli a4, a0, 29 ; RV32IMB-NEXT: sh3add a1, a1, a4 -; RV32IMB-NEXT: sh3add a0, a0, a2 -; RV32IMB-NEXT: sltu a2, a0, a2 -; RV32IMB-NEXT: add a1, a3, a1 -; RV32IMB-NEXT: add a1, a1, a2 +; RV32IMB-NEXT: sh3add a0, a0, a3 +; RV32IMB-NEXT: srli a2, a2, 27 +; RV32IMB-NEXT: sltu a3, a0, a3 +; RV32IMB-NEXT: add a1, a2, a1 +; RV32IMB-NEXT: add a1, a1, a3 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: sh2add_uw: diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index c7938a718de70..1c478166f83a7 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -171,8 +171,8 @@ define i64 @add(i64 %a, i64 %b) nounwind { ; ; RV32I-LABEL: add: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: add a1, a1, a3 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a1, a1, a0 ; RV32I-NEXT: mv a0, a2 @@ -206,21 +206,21 @@ define i64 @sll(i64 %a, i64 %b) nounwind { ; ; RV32I-LABEL: sll: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a4, a2, -32 -; RV32I-NEXT: sll a3, a0, a2 -; RV32I-NEXT: bltz a4, .LBB11_2 +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: sll a4, a0, a2 +; RV32I-NEXT: bltz a3, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: not a5, a2 ; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: srl a0, a0, a5 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: srai a0, a4, 31 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: srai a0, a3, 31 +; RV32I-NEXT: and a0, a0, a4 ; RV32I-NEXT: ret %1 = shl i64 %a, %b ret i64 %1 @@ -300,10 +300,10 @@ define i64 @srl(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: j .LBB15_3 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: not a5, a2 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: sll a1, a1, a5 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: srai a1, a4, 31 @@ -331,10 +331,10 @@ define i64 @sra(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB16_2: -; RV32I-NEXT: srl a0, a0, a2 -; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: not a4, a2 ; RV32I-NEXT: slli a3, a3, 1 -; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: sll a2, a3, a4 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: ret %1 = ashr i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll index 4f036d3c991b9..5bd1edb783fce 100644 --- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll +++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll @@ -7,8 +7,8 @@ define i1 @test1(i64 %x) { ; RV32: # %bb.0: ; RV32-NEXT: slli a2, a1, 2 ; RV32-NEXT: srli a0, a0, 30 -; RV32-NEXT: srai a1, a1, 30 ; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: srai a1, a1, 30 ; RV32-NEXT: xori a0, a0, -2 ; RV32-NEXT: not a1, a1 ; RV32-NEXT: or a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/arith-with-overflow.ll b/llvm/test/CodeGen/RISCV/arith-with-overflow.ll index 551d8864033f3..cfedfcda4f599 100644 --- a/llvm/test/CodeGen/RISCV/arith-with-overflow.ll +++ b/llvm/test/CodeGen/RISCV/arith-with-overflow.ll @@ -27,11 +27,11 @@ entry: define i1 @ssub(i32 %a, i32 %b, ptr %c) nounwind { ; RV32I-LABEL: ssub: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: sgtz a3, a1 -; RV32I-NEXT: sub a1, a0, a1 -; RV32I-NEXT: slt a0, a1, a0 -; RV32I-NEXT: xor a0, a3, a0 -; RV32I-NEXT: sw a1, 0(a2) +; RV32I-NEXT: sub a3, a0, a1 +; RV32I-NEXT: sgtz a1, a1 +; RV32I-NEXT: slt a0, a3, a0 +; RV32I-NEXT: xor a0, a1, a0 +; RV32I-NEXT: sw a3, 0(a2) ; RV32I-NEXT: ret entry: %x = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll index d427b4435d37d..0b4d65c4565fd 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll @@ -36,8 +36,8 @@ define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) n ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB0_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 -; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a3, a1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) ; ZACAS-NEXT: bne a3, a1, .LBB0_1 ; ZACAS-NEXT: # %bb.2: # %exit @@ -77,8 +77,8 @@ define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) n ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB1_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 -; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a3, a1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0) ; ZACAS-NEXT: beq a3, a1, .LBB1_1 ; ZACAS-NEXT: # %bb.2: # %exit @@ -96,12 +96,12 @@ exit: define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %val) nounwind { ; RV32IA-LABEL: cmpxchg_masked_and_branch1: ; RV32IA: # %bb.0: # %entry -; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: li a0, 255 +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a3, a3, a4 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a0, a0, a4 ; RV32IA-NEXT: sll a1, a1, a4 ; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB2_1: # %do_cmpxchg @@ -110,15 +110,15 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-NEXT: .LBB2_3: # %do_cmpxchg ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV32IA-NEXT: lr.w.aqrl a4, (a3) -; RV32IA-NEXT: and a5, a4, a0 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 ; RV32IA-NEXT: bne a5, a1, .LBB2_1 ; RV32IA-NEXT: # %bb.4: # %do_cmpxchg ; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2 ; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a0 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB2_3 ; RV32IA-NEXT: # %bb.5: # %do_cmpxchg ; RV32IA-NEXT: # %bb.2: # %exit @@ -126,12 +126,12 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; ; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: ; RV32IA-ZACAS: # %bb.0: # %entry -; RV32IA-ZACAS-NEXT: andi a3, a0, -4 ; RV32IA-ZACAS-NEXT: slli a4, a0, 3 -; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: li a3, 255 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a3, a3, a4 ; RV32IA-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-ZACAS-NEXT: sll a0, a0, a4 ; RV32IA-ZACAS-NEXT: sll a1, a1, a4 ; RV32IA-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg @@ -140,15 +140,15 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) -; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-ZACAS-NEXT: and a5, a4, a3 ; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 ; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 ; RV32IA-ZACAS-NEXT: xor a5, a4, a2 -; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a3 ; RV32IA-ZACAS-NEXT: xor a5, a4, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB2_3 ; RV32IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # %bb.2: # %exit @@ -156,12 +156,12 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; ; RV64IA-LABEL: cmpxchg_masked_and_branch1: ; RV64IA: # %bb.0: # %entry -; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: li a0, 255 +; RV64IA-NEXT: li a3, 255 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: zext.b a2, a2 -; RV64IA-NEXT: sllw a0, a0, a4 ; RV64IA-NEXT: sllw a1, a1, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: .LBB2_1: # %do_cmpxchg @@ -170,15 +170,15 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-NEXT: .LBB2_3: # %do_cmpxchg ; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a3) -; RV64IA-NEXT: and a5, a4, a0 +; RV64IA-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-NEXT: and a5, a4, a3 ; RV64IA-NEXT: bne a5, a1, .LBB2_1 ; RV64IA-NEXT: # %bb.4: # %do_cmpxchg ; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2 ; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a0 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB2_3 ; RV64IA-NEXT: # %bb.5: # %do_cmpxchg ; RV64IA-NEXT: # %bb.2: # %exit @@ -186,12 +186,12 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; ; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch1: ; RV64IA-ZACAS: # %bb.0: # %entry -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 ; RV64IA-ZACAS-NEXT: slli a4, a0, 3 -; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a4 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 ; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB2_1: # %do_cmpxchg @@ -200,15 +200,15 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZACAS-NEXT: .LBB2_3: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # Parent Loop BB2_1 Depth=1 ; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) -; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-ZACAS-NEXT: and a5, a4, a3 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB2_1 ; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # in Loop: Header=BB2_3 Depth=2 ; RV64IA-ZACAS-NEXT: xor a5, a4, a2 -; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 ; RV64IA-ZACAS-NEXT: xor a5, a4, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB2_3 ; RV64IA-ZACAS-NEXT: # %bb.5: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # %bb.2: # %exit @@ -218,8 +218,8 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZABHA: # %bb.0: # %entry ; RV64IA-ZABHA-NEXT: .LBB2_1: # %do_cmpxchg ; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: mv a3, a1 +; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0) ; RV64IA-ZABHA-NEXT: bne a3, a1, .LBB2_1 ; RV64IA-ZABHA-NEXT: # %bb.2: # %exit @@ -237,12 +237,12 @@ exit: define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %val) nounwind { ; RV32IA-LABEL: cmpxchg_masked_and_branch2: ; RV32IA: # %bb.0: # %entry -; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: slli a4, a0, 3 -; RV32IA-NEXT: li a0, 255 +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a3, a3, a4 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a0, a0, a4 ; RV32IA-NEXT: sll a1, a1, a4 ; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB3_1: # %do_cmpxchg @@ -251,31 +251,31 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-NEXT: .LBB3_3: # %do_cmpxchg ; RV32IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV32IA-NEXT: lr.w.aqrl a4, (a3) -; RV32IA-NEXT: and a5, a4, a0 +; RV32IA-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-NEXT: and a5, a4, a3 ; RV32IA-NEXT: bne a5, a1, .LBB3_5 ; RV32IA-NEXT: # %bb.4: # %do_cmpxchg ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=2 ; RV32IA-NEXT: xor a5, a4, a2 -; RV32IA-NEXT: and a5, a5, a0 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB3_3 ; RV32IA-NEXT: .LBB3_5: # %do_cmpxchg ; RV32IA-NEXT: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-NEXT: and a4, a4, a0 +; RV32IA-NEXT: and a4, a4, a3 ; RV32IA-NEXT: beq a1, a4, .LBB3_1 ; RV32IA-NEXT: # %bb.2: # %exit ; RV32IA-NEXT: ret ; ; RV32IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: ; RV32IA-ZACAS: # %bb.0: # %entry -; RV32IA-ZACAS-NEXT: andi a3, a0, -4 ; RV32IA-ZACAS-NEXT: slli a4, a0, 3 -; RV32IA-ZACAS-NEXT: li a0, 255 +; RV32IA-ZACAS-NEXT: li a3, 255 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a3, a3, a4 ; RV32IA-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-ZACAS-NEXT: sll a0, a0, a4 ; RV32IA-ZACAS-NEXT: sll a1, a1, a4 ; RV32IA-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg @@ -284,31 +284,31 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV32IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 ; RV32IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) -; RV32IA-ZACAS-NEXT: and a5, a4, a0 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a0) +; RV32IA-ZACAS-NEXT: and a5, a4, a3 ; RV32IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 ; RV32IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 ; RV32IA-ZACAS-NEXT: xor a5, a4, a2 -; RV32IA-ZACAS-NEXT: and a5, a5, a0 +; RV32IA-ZACAS-NEXT: and a5, a5, a3 ; RV32IA-ZACAS-NEXT: xor a5, a4, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB3_3 ; RV32IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg ; RV32IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-ZACAS-NEXT: and a4, a4, a0 +; RV32IA-ZACAS-NEXT: and a4, a4, a3 ; RV32IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 ; RV32IA-ZACAS-NEXT: # %bb.2: # %exit ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-LABEL: cmpxchg_masked_and_branch2: ; RV64IA: # %bb.0: # %entry -; RV64IA-NEXT: andi a3, a0, -4 ; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: li a0, 255 +; RV64IA-NEXT: li a3, 255 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: zext.b a2, a2 -; RV64IA-NEXT: sllw a0, a0, a4 ; RV64IA-NEXT: sllw a1, a1, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: .LBB3_1: # %do_cmpxchg @@ -317,31 +317,31 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-NEXT: .LBB3_3: # %do_cmpxchg ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a3) -; RV64IA-NEXT: and a5, a4, a0 +; RV64IA-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-NEXT: and a5, a4, a3 ; RV64IA-NEXT: bne a5, a1, .LBB3_5 ; RV64IA-NEXT: # %bb.4: # %do_cmpxchg ; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2 ; RV64IA-NEXT: xor a5, a4, a2 -; RV64IA-NEXT: and a5, a5, a0 +; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 -; RV64IA-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB3_3 ; RV64IA-NEXT: .LBB3_5: # %do_cmpxchg ; RV64IA-NEXT: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-NEXT: and a4, a4, a0 +; RV64IA-NEXT: and a4, a4, a3 ; RV64IA-NEXT: beq a1, a4, .LBB3_1 ; RV64IA-NEXT: # %bb.2: # %exit ; RV64IA-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_masked_and_branch2: ; RV64IA-ZACAS: # %bb.0: # %entry -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 ; RV64IA-ZACAS-NEXT: slli a4, a0, 3 -; RV64IA-ZACAS-NEXT: li a0, 255 +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a4 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a0, a0, a4 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 ; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB3_1: # %do_cmpxchg @@ -350,19 +350,19 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZACAS-NEXT: .LBB3_3: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-ZACAS-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) -; RV64IA-ZACAS-NEXT: and a5, a4, a0 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a0) +; RV64IA-ZACAS-NEXT: and a5, a4, a3 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB3_5 ; RV64IA-ZACAS-NEXT: # %bb.4: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_3 Depth=2 ; RV64IA-ZACAS-NEXT: xor a5, a4, a2 -; RV64IA-ZACAS-NEXT: and a5, a5, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 ; RV64IA-ZACAS-NEXT: xor a5, a4, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB3_3 ; RV64IA-ZACAS-NEXT: .LBB3_5: # %do_cmpxchg ; RV64IA-ZACAS-NEXT: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-ZACAS-NEXT: and a4, a4, a0 +; RV64IA-ZACAS-NEXT: and a4, a4, a3 ; RV64IA-ZACAS-NEXT: beq a1, a4, .LBB3_1 ; RV64IA-ZACAS-NEXT: # %bb.2: # %exit ; RV64IA-ZACAS-NEXT: ret @@ -371,8 +371,8 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v ; RV64IA-ZABHA: # %bb.0: # %entry ; RV64IA-ZABHA-NEXT: .LBB3_1: # %do_cmpxchg ; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: mv a3, a1 +; RV64IA-ZABHA-NEXT: fence rw, rw ; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0) ; RV64IA-ZABHA-NEXT: beq a3, a1, .LBB3_1 ; RV64IA-ZABHA-NEXT: # %bb.2: # %exit @@ -412,8 +412,8 @@ define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signe ; ZACAS: # %bb.0: # %entry ; ZACAS-NEXT: .LBB4_1: # %do_cmpxchg ; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 -; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: mv a4, a1 +; ZACAS-NEXT: fence rw, rw ; ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0) ; ZACAS-NEXT: beqz a3, .LBB4_1 ; ZACAS-NEXT: # %bb.2: # %exit diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index 12f057ce4ccd3..1a0194c66458f 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -40,23 +40,23 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; ; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 ; RV32IA-NEXT: bne a5, a1, .LBB0_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: xor a5, a3, a2 ; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w a5, a5, (a3) +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB0_1 ; RV32IA-NEXT: .LBB0_3: ; RV32IA-NEXT: ret @@ -76,46 +76,46 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB0_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB0_1 ; RV64IA-WMO-NEXT: .LBB0_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB0_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB0_1 ; RV64IA-ZACAS-NEXT: .LBB0_3: ; RV64IA-ZACAS-NEXT: ret @@ -127,23 +127,23 @@ define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB0_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB0_1 ; RV64IA-TSO-NEXT: .LBB0_3: ; RV64IA-TSO-NEXT: ret @@ -167,92 +167,92 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB1_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB1_1 ; RV32IA-WMO-NEXT: .LBB1_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB1_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB1_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB1_1 ; RV32IA-TSO-NEXT: .LBB1_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB1_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -272,46 +272,46 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB1_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB1_1 ; RV64IA-WMO-NEXT: .LBB1_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB1_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -323,46 +323,46 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB1_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 ; RV64IA-TSO-NEXT: .LBB1_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB1_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -391,92 +391,92 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB2_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB2_1 ; RV32IA-WMO-NEXT: .LBB2_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB2_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB2_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB2_1 ; RV32IA-TSO-NEXT: .LBB2_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB2_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -496,46 +496,46 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB2_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB2_1 ; RV64IA-WMO-NEXT: .LBB2_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB2_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -547,46 +547,46 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB2_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 ; RV64IA-TSO-NEXT: .LBB2_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB2_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -615,92 +615,92 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB3_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB3_1 ; RV32IA-WMO-NEXT: .LBB3_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB3_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB3_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB3_1 ; RV32IA-TSO-NEXT: .LBB3_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB3_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -720,46 +720,46 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB3_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB3_1 ; RV64IA-WMO-NEXT: .LBB3_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB3_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -771,46 +771,46 @@ define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB3_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 ; RV64IA-TSO-NEXT: .LBB3_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB3_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -839,92 +839,92 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB4_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB4_1 ; RV32IA-WMO-NEXT: .LBB4_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB4_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB4_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB4_1 ; RV32IA-TSO-NEXT: .LBB4_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB4_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -944,46 +944,46 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB4_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB4_1 ; RV64IA-WMO-NEXT: .LBB4_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB4_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -995,46 +995,46 @@ define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB4_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB4_1 ; RV64IA-TSO-NEXT: .LBB4_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB4_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -1063,92 +1063,92 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB5_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB5_1 ; RV32IA-WMO-NEXT: .LBB5_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB5_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB5_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB5_1 ; RV32IA-TSO-NEXT: .LBB5_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB5_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -1168,46 +1168,46 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB5_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB5_1 ; RV64IA-WMO-NEXT: .LBB5_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB5_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -1219,46 +1219,46 @@ define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB5_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB5_1 ; RV64IA-TSO-NEXT: .LBB5_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB5_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -1287,92 +1287,92 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: slli a3, a0, 3 ; RV32IA-WMO-NEXT: li a4, 255 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a4, a4, a3 ; RV32IA-WMO-NEXT: zext.b a1, a1 ; RV32IA-WMO-NEXT: zext.b a2, a2 -; RV32IA-WMO-NEXT: sll a4, a4, a0 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NEXT: sll a2, a2, a3 ; RV32IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a5, a2, a4 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a5, a3, a4 ; RV32IA-WMO-NEXT: bne a5, a1, .LBB6_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: xor a5, a3, a2 ; RV32IA-WMO-NEXT: and a5, a5, a4 -; RV32IA-WMO-NEXT: xor a5, a2, a5 -; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-NEXT: bnez a5, .LBB6_1 ; RV32IA-WMO-NEXT: .LBB6_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a4, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB6_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: slli a3, a0, 3 ; RV32IA-TSO-NEXT: li a4, 255 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a4, a4, a3 ; RV32IA-TSO-NEXT: zext.b a1, a1 ; RV32IA-TSO-NEXT: zext.b a2, a2 -; RV32IA-TSO-NEXT: sll a4, a4, a0 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NEXT: sll a2, a2, a3 ; RV32IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a5, a2, a4 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a5, a3, a4 ; RV32IA-TSO-NEXT: bne a5, a1, .LBB6_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: xor a5, a3, a2 ; RV32IA-TSO-NEXT: and a5, a5, a4 -; RV32IA-TSO-NEXT: xor a5, a2, a5 -; RV32IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-NEXT: bnez a5, .LBB6_1 ; RV32IA-TSO-NEXT: .LBB6_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a4, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a3 ; RV32IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB6_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -1392,46 +1392,46 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB6_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB6_1 ; RV64IA-WMO-NEXT: .LBB6_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a4, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB6_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -1443,46 +1443,46 @@ define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB6_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB6_1 ; RV64IA-TSO-NEXT: .LBB6_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a4, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB6_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -1511,23 +1511,23 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 ; RV32IA-NEXT: bne a5, a1, .LBB7_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: xor a5, a3, a2 ; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB7_1 ; RV32IA-NEXT: .LBB7_3: ; RV32IA-NEXT: ret @@ -1547,46 +1547,46 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB7_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB7_1 ; RV64IA-WMO-NEXT: .LBB7_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-ZACAS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB7_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB7_1 ; RV64IA-ZACAS-NEXT: .LBB7_3: ; RV64IA-ZACAS-NEXT: ret @@ -1598,23 +1598,23 @@ define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB7_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB7_1 ; RV64IA-TSO-NEXT: .LBB7_3: ; RV64IA-TSO-NEXT: ret @@ -1643,23 +1643,23 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 ; RV32IA-NEXT: bne a5, a1, .LBB8_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: xor a5, a3, a2 ; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB8_1 ; RV32IA-NEXT: .LBB8_3: ; RV32IA-NEXT: ret @@ -1679,46 +1679,46 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB8_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB8_1 ; RV64IA-WMO-NEXT: .LBB8_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-ZACAS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB8_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB8_1 ; RV64IA-ZACAS-NEXT: .LBB8_3: ; RV64IA-ZACAS-NEXT: ret @@ -1730,23 +1730,23 @@ define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB8_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB8_1 ; RV64IA-TSO-NEXT: .LBB8_3: ; RV64IA-TSO-NEXT: ret @@ -1775,23 +1775,23 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV32IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 ; RV32IA-NEXT: bne a5, a1, .LBB9_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: xor a5, a3, a2 ; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a3) +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB9_1 ; RV32IA-NEXT: .LBB9_3: ; RV32IA-NEXT: ret @@ -1811,46 +1811,46 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: slli a3, a0, 3 ; RV64IA-WMO-NEXT: li a4, 255 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a4, a4, a3 ; RV64IA-WMO-NEXT: zext.b a1, a1 ; RV64IA-WMO-NEXT: zext.b a2, a2 -; RV64IA-WMO-NEXT: sllw a4, a4, a0 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NEXT: sllw a2, a2, a3 ; RV64IA-WMO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a5, a2, a4 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a5, a3, a4 ; RV64IA-WMO-NEXT: bne a5, a1, .LBB9_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: xor a5, a3, a2 ; RV64IA-WMO-NEXT: and a5, a5, a4 -; RV64IA-WMO-NEXT: xor a5, a2, a5 -; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-WMO-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-WMO-NEXT: bnez a5, .LBB9_1 ; RV64IA-WMO-NEXT: .LBB9_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a3, a0, 3 ; RV64IA-ZACAS-NEXT: li a4, 255 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a4, a4, a3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 ; RV64IA-ZACAS-NEXT: zext.b a2, a2 -; RV64IA-ZACAS-NEXT: sllw a4, a4, a0 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a3 ; RV64IA-ZACAS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a5, a2, a4 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a5, a3, a4 ; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB9_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: xor a5, a3, a2 ; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a2, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB9_1 ; RV64IA-ZACAS-NEXT: .LBB9_3: ; RV64IA-ZACAS-NEXT: ret @@ -1863,23 +1863,23 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; ; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: slli a3, a0, 3 ; RV64IA-TSO-NEXT: li a4, 255 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a4, a4, a3 ; RV64IA-TSO-NEXT: zext.b a1, a1 ; RV64IA-TSO-NEXT: zext.b a2, a2 -; RV64IA-TSO-NEXT: sllw a4, a4, a0 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NEXT: sllw a2, a2, a3 ; RV64IA-TSO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a5, a2, a4 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a5, a3, a4 ; RV64IA-TSO-NEXT: bne a5, a1, .LBB9_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: xor a5, a3, a2 ; RV64IA-TSO-NEXT: and a5, a5, a4 -; RV64IA-TSO-NEXT: xor a5, a2, a5 -; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a3) +; RV64IA-TSO-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a0) ; RV64IA-TSO-NEXT: bnez a5, .LBB9_1 ; RV64IA-TSO-NEXT: .LBB9_3: ; RV64IA-TSO-NEXT: ret @@ -1909,24 +1909,24 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; ; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 ; RV32IA-NEXT: bne a4, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 +; RV32IA-NEXT: xor a4, a3, a2 ; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w a4, a4, (a3) +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) ; RV32IA-NEXT: bnez a4, .LBB10_1 ; RV32IA-NEXT: .LBB10_3: ; RV32IA-NEXT: ret @@ -1946,48 +1946,48 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; ; RV64IA-WMO-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB10_1 ; RV64IA-WMO-NEXT: .LBB10_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a4, 16 -; RV64IA-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-ZACAS-NEXT: bnez a4, .LBB10_1 ; RV64IA-ZACAS-NEXT: .LBB10_3: ; RV64IA-ZACAS-NEXT: ret @@ -1999,24 +1999,24 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw ; ; RV64IA-TSO-LABEL: cmpxchg_i16_monotonic_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB10_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB10_1 ; RV64IA-TSO-NEXT: .LBB10_3: ; RV64IA-TSO-NEXT: ret @@ -2040,96 +2040,96 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB11_1 ; RV32IA-WMO-NEXT: .LBB11_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB11_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV32IA-TSO-NEXT: .LBB11_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB11_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -2149,48 +2149,48 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB11_1 ; RV64IA-WMO-NEXT: .LBB11_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB11_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -2202,48 +2202,48 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-NEXT: .LBB11_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB11_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB11_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB11_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -2272,96 +2272,96 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB12_1 ; RV32IA-WMO-NEXT: .LBB12_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB12_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV32IA-TSO-NEXT: .LBB12_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB12_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -2381,48 +2381,48 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB12_1 ; RV64IA-WMO-NEXT: .LBB12_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB12_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -2434,48 +2434,48 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-NEXT: .LBB12_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB12_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB12_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB12_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -2504,96 +2504,96 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB13_1 ; RV32IA-WMO-NEXT: .LBB13_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB13_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV32IA-TSO-NEXT: .LBB13_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB13_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -2613,48 +2613,48 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB13_1 ; RV64IA-WMO-NEXT: .LBB13_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB13_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -2666,48 +2666,48 @@ define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-NEXT: .LBB13_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB13_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB13_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB13_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -2736,96 +2736,96 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB14_1 ; RV32IA-WMO-NEXT: .LBB14_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB14_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV32IA-TSO-NEXT: .LBB14_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB14_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -2845,48 +2845,48 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB14_1 ; RV64IA-WMO-NEXT: .LBB14_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB14_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -2898,48 +2898,48 @@ define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-NEXT: .LBB14_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB14_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB14_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB14_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -2968,96 +2968,96 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB15_1 ; RV32IA-WMO-NEXT: .LBB15_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB15_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV32IA-TSO-NEXT: .LBB15_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB15_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -3077,48 +3077,48 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB15_1 ; RV64IA-WMO-NEXT: .LBB15_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB15_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -3130,48 +3130,48 @@ define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-NEXT: .LBB15_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB15_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB15_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB15_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -3200,96 +3200,96 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: andi a3, a0, -4 -; RV32IA-WMO-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NEXT: lui a4, 16 -; RV32IA-WMO-NEXT: addi a4, a4, -1 -; RV32IA-WMO-NEXT: sll a5, a4, a0 -; RV32IA-WMO-NEXT: and a1, a1, a4 -; RV32IA-WMO-NEXT: and a2, a2, a4 -; RV32IA-WMO-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: slli a4, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NEXT: sll a5, a3, a4 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a4 +; RV32IA-WMO-NEXT: sll a2, a2, a4 ; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-NEXT: and a4, a2, a5 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: and a4, a3, a5 ; RV32IA-WMO-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-WMO-NEXT: xor a4, a2, a0 +; RV32IA-WMO-NEXT: xor a4, a3, a2 ; RV32IA-WMO-NEXT: and a4, a4, a5 -; RV32IA-WMO-NEXT: xor a4, a2, a4 -; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-NEXT: xor a4, a3, a4 +; RV32IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-NEXT: bnez a4, .LBB16_1 ; RV32IA-WMO-NEXT: .LBB16_3: ; RV32IA-WMO-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV32IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV32IA-WMO-ZACAS-NEXT: .LBB16_3: ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: andi a3, a0, -4 -; RV32IA-TSO-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NEXT: lui a4, 16 -; RV32IA-TSO-NEXT: addi a4, a4, -1 -; RV32IA-TSO-NEXT: sll a5, a4, a0 -; RV32IA-TSO-NEXT: and a1, a1, a4 -; RV32IA-TSO-NEXT: and a2, a2, a4 -; RV32IA-TSO-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: slli a4, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NEXT: sll a5, a3, a4 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a4 +; RV32IA-TSO-NEXT: sll a2, a2, a4 ; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NEXT: lr.w a2, (a3) -; RV32IA-TSO-NEXT: and a4, a2, a5 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: and a4, a3, a5 ; RV32IA-TSO-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-TSO-NEXT: xor a4, a2, a0 +; RV32IA-TSO-NEXT: xor a4, a3, a2 ; RV32IA-TSO-NEXT: and a4, a4, a5 -; RV32IA-TSO-NEXT: xor a4, a2, a4 -; RV32IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-NEXT: xor a4, a3, a4 +; RV32IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV32IA-TSO-NEXT: .LBB16_3: ; RV32IA-TSO-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a5, a4, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV32IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV32IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV32IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV32IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV32IA-TSO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV32IA-TSO-ZACAS-NEXT: .LBB16_3: ; RV32IA-TSO-ZACAS-NEXT: ret @@ -3309,48 +3309,48 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB16_1 ; RV64IA-WMO-NEXT: .LBB16_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a4, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a3) -; RV64IA-WMO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-WMO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV64IA-WMO-ZACAS-NEXT: .LBB16_3: ; RV64IA-WMO-ZACAS-NEXT: ret @@ -3362,48 +3362,48 @@ define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-NEXT: .LBB16_3: ; RV64IA-TSO-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a4, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a3) -; RV64IA-TSO-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a0) +; RV64IA-TSO-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-TSO-ZACAS-NEXT: bne a4, a1, .LBB16_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-TSO-ZACAS-NEXT: sc.w a4, a4, (a0) ; RV64IA-TSO-ZACAS-NEXT: bnez a4, .LBB16_1 ; RV64IA-TSO-ZACAS-NEXT: .LBB16_3: ; RV64IA-TSO-ZACAS-NEXT: ret @@ -3432,24 +3432,24 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 ; RV32IA-NEXT: bne a4, a1, .LBB17_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 +; RV32IA-NEXT: xor a4, a3, a2 ; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-NEXT: bnez a4, .LBB17_1 ; RV32IA-NEXT: .LBB17_3: ; RV32IA-NEXT: ret @@ -3469,48 +3469,48 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB17_1 ; RV64IA-WMO-NEXT: .LBB17_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a4, 16 -; RV64IA-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-ZACAS-NEXT: bnez a4, .LBB17_1 ; RV64IA-ZACAS-NEXT: .LBB17_3: ; RV64IA-ZACAS-NEXT: ret @@ -3522,24 +3522,24 @@ define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin ; ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_monotonic: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB17_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB17_1 ; RV64IA-TSO-NEXT: .LBB17_3: ; RV64IA-TSO-NEXT: ret @@ -3568,24 +3568,24 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 ; RV32IA-NEXT: bne a4, a1, .LBB18_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 +; RV32IA-NEXT: xor a4, a3, a2 ; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-NEXT: bnez a4, .LBB18_1 ; RV32IA-NEXT: .LBB18_3: ; RV32IA-NEXT: ret @@ -3605,48 +3605,48 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB18_1 ; RV64IA-WMO-NEXT: .LBB18_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a4, 16 -; RV64IA-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-ZACAS-NEXT: bnez a4, .LBB18_1 ; RV64IA-ZACAS-NEXT: .LBB18_3: ; RV64IA-ZACAS-NEXT: ret @@ -3658,24 +3658,24 @@ define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB18_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB18_1 ; RV64IA-TSO-NEXT: .LBB18_3: ; RV64IA-TSO-NEXT: ret @@ -3704,24 +3704,24 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 +; RV32IA-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 ; RV32IA-NEXT: bne a4, a1, .LBB19_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 +; RV32IA-NEXT: xor a4, a3, a2 ; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w.rl a4, a4, (a3) +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w.rl a4, a4, (a0) ; RV32IA-NEXT: bnez a4, .LBB19_1 ; RV32IA-NEXT: .LBB19_3: ; RV32IA-NEXT: ret @@ -3741,48 +3741,48 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: andi a3, a0, -4 -; RV64IA-WMO-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NEXT: lui a4, 16 -; RV64IA-WMO-NEXT: addi a4, a4, -1 -; RV64IA-WMO-NEXT: sllw a5, a4, a0 -; RV64IA-WMO-NEXT: and a1, a1, a4 -; RV64IA-WMO-NEXT: and a2, a2, a4 -; RV64IA-WMO-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: slli a4, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NEXT: sllw a5, a3, a4 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a4 +; RV64IA-WMO-NEXT: sllw a2, a2, a4 ; RV64IA-WMO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-WMO-NEXT: and a4, a2, a5 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: and a4, a3, a5 ; RV64IA-WMO-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-WMO-NEXT: xor a4, a2, a0 +; RV64IA-WMO-NEXT: xor a4, a3, a2 ; RV64IA-WMO-NEXT: and a4, a4, a5 -; RV64IA-WMO-NEXT: xor a4, a2, a4 -; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-WMO-NEXT: xor a4, a3, a4 +; RV64IA-WMO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-WMO-NEXT: bnez a4, .LBB19_1 ; RV64IA-WMO-NEXT: .LBB19_3: ; RV64IA-WMO-NEXT: ret ; ; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a3, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a4, 16 -; RV64IA-ZACAS-NEXT: addi a4, a4, -1 -; RV64IA-ZACAS-NEXT: sllw a5, a4, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a4 -; RV64IA-ZACAS-NEXT: and a2, a2, a4 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: slli a4, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a4 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a4 ; RV64IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-ZACAS-NEXT: and a4, a2, a5 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-ZACAS-NEXT: and a4, a3, a5 ; RV64IA-ZACAS-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a4, a2, a0 +; RV64IA-ZACAS-NEXT: xor a4, a3, a2 ; RV64IA-ZACAS-NEXT: and a4, a4, a5 -; RV64IA-ZACAS-NEXT: xor a4, a2, a4 -; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-ZACAS-NEXT: xor a4, a3, a4 +; RV64IA-ZACAS-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-ZACAS-NEXT: bnez a4, .LBB19_1 ; RV64IA-ZACAS-NEXT: .LBB19_3: ; RV64IA-ZACAS-NEXT: ret @@ -3795,24 +3795,24 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind ; ; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_seq_cst: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: andi a3, a0, -4 -; RV64IA-TSO-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NEXT: lui a4, 16 -; RV64IA-TSO-NEXT: addi a4, a4, -1 -; RV64IA-TSO-NEXT: sllw a5, a4, a0 -; RV64IA-TSO-NEXT: and a1, a1, a4 -; RV64IA-TSO-NEXT: and a2, a2, a4 -; RV64IA-TSO-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: slli a4, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NEXT: sllw a5, a3, a4 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a4 +; RV64IA-TSO-NEXT: sllw a2, a2, a4 ; RV64IA-TSO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a3) -; RV64IA-TSO-NEXT: and a4, a2, a5 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: and a4, a3, a5 ; RV64IA-TSO-NEXT: bne a4, a1, .LBB19_3 ; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 -; RV64IA-TSO-NEXT: xor a4, a2, a0 +; RV64IA-TSO-NEXT: xor a4, a3, a2 ; RV64IA-TSO-NEXT: and a4, a4, a5 -; RV64IA-TSO-NEXT: xor a4, a2, a4 -; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a3) +; RV64IA-TSO-NEXT: xor a4, a3, a4 +; RV64IA-TSO-NEXT: sc.w.rl a4, a4, (a0) ; RV64IA-TSO-NEXT: bnez a4, .LBB19_1 ; RV64IA-TSO-NEXT: .LBB19_3: ; RV64IA-TSO-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 26feb8325dec0..cdbd9d8da94e5 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -1120,29 +1120,29 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: not a2, a2 ; RV32I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB5_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-NOZACAS-NEXT: li a2, 255 -; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-NOZACAS-NEXT: not a2, a2 -; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i8_monotonic: @@ -1158,51 +1158,51 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: not a2, a2 ; RV64I-ZALRSC-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB5_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-NOZACAS-NEXT: li a2, 255 -; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-NOZACAS-NEXT: not a2, a2 -; RV64IA-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-ZACAS-NEXT: li a2, 255 -; RV32IA-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-ZACAS-NEXT: not a2, a2 -; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-ZACAS-NEXT: li a2, 255 -; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-ZACAS-NEXT: not a2, a2 -; RV64IA-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic: @@ -1242,40 +1242,40 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: not a2, a2 ; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i8_acquire: @@ -1291,84 +1291,84 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: not a2, a2 ; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire: @@ -1408,40 +1408,40 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: not a2, a2 ; RV32I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB7_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i8_release: @@ -1457,84 +1457,84 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: not a2, a2 ; RV64I-ZALRSC-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB7_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release: @@ -1574,40 +1574,40 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: not a2, a2 ; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i8_acq_rel: @@ -1623,84 +1623,84 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: not a2, a2 ; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel: @@ -1740,40 +1740,40 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: not a2, a2 ; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i8_seq_cst: @@ -1789,84 +1789,84 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: not a2, a2 ; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst: @@ -1906,27 +1906,27 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 ; RV32I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB10_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-NOZACAS-NEXT: li a2, 255 -; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: @@ -1942,47 +1942,47 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 ; RV64I-ZALRSC-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB10_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-NOZACAS-NEXT: li a2, 255 -; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-ZACAS-NEXT: li a2, 255 -; RV32IA-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-ZACAS-NEXT: li a2, 255 -; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: @@ -2026,37 +2026,37 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 ; RV32I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB11_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acquire: @@ -2072,77 +2072,77 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 ; RV64I-ZALRSC-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB11_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire: @@ -2186,37 +2186,37 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 ; RV32I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB12_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_release: @@ -2232,77 +2232,77 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 ; RV64I-ZALRSC-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB12_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release: @@ -2346,37 +2346,37 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 ; RV32I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB13_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: @@ -2392,77 +2392,77 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 ; RV64I-ZALRSC-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB13_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: @@ -2506,37 +2506,37 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a1, a0, 3 ; RV32I-ZALRSC-NEXT: li a2, 255 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a1 ; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB14_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a1 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: @@ -2552,77 +2552,77 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a1, a0, 3 ; RV64I-ZALRSC-NEXT: li a2, 255 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a1 ; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB14_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a1 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a2, 255 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a2, 255 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a2, 255 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a2, 255 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a1 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a1 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a2, 255 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a2, 255 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a1 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a1 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: @@ -4805,35 +4805,35 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB25_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-NOZACAS-NEXT: li a3, 255 +; RV32IA-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-NOZACAS-NEXT: not a3, a3 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i8_monotonic: @@ -4848,63 +4848,63 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB25_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-NOZACAS-NEXT: li a3, 255 +; RV64IA-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-NOZACAS-NEXT: not a3, a3 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-ZACAS-NEXT: li a3, 255 +; RV32IA-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-ZACAS-NEXT: not a3, a3 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-ZACAS-NEXT: not a3, a3 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic: @@ -4943,49 +4943,49 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB26_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i8_acquire: @@ -5000,105 +5000,105 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB26_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: not a3, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: not a3, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: not a3, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: not a3, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire: @@ -5137,49 +5137,49 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB27_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i8_release: @@ -5194,105 +5194,105 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB27_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: not a3, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: not a3, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: not a3, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: not a3, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release: @@ -5331,49 +5331,49 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB28_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i8_acq_rel: @@ -5388,105 +5388,105 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB28_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: not a3, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: not a3, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: not a3, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: not a3, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel: @@ -5525,49 +5525,49 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB29_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i8_seq_cst: @@ -5582,105 +5582,105 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB29_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 +; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: not a3, a3 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 +; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: not a3, a3 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: not a3, a3 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a2 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: not a3, a3 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: not a3, a3 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a2 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: not a3, a3 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst: @@ -7372,8 +7372,8 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start ; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24 ; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2) ; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24 @@ -7388,8 +7388,8 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start ; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24 ; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2) ; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24 @@ -7404,8 +7404,8 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 56 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 56 @@ -7420,8 +7420,8 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 56 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 56 @@ -7445,27 +7445,27 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB35_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i8_monotonic: @@ -7480,47 +7480,47 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB35_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic: @@ -7559,37 +7559,37 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB36_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i8_acquire: @@ -7604,77 +7604,77 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB36_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire: @@ -7713,37 +7713,37 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB37_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i8_release: @@ -7758,77 +7758,77 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB37_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release: @@ -7867,37 +7867,37 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB38_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i8_acq_rel: @@ -7912,77 +7912,77 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB38_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel: @@ -8021,37 +8021,37 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB39_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i8_seq_cst: @@ -8066,77 +8066,77 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB39_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst: @@ -8175,27 +8175,27 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB40_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i8_monotonic: @@ -8210,47 +8210,47 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB40_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic: @@ -8289,37 +8289,37 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB41_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i8_acquire: @@ -8334,77 +8334,77 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB41_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire: @@ -8443,37 +8443,37 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB42_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i8_release: @@ -8488,77 +8488,77 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB42_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release: @@ -8597,37 +8597,37 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB43_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i8_acq_rel: @@ -8642,77 +8642,77 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB43_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel: @@ -8751,37 +8751,37 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB44_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i8_seq_cst: @@ -8796,77 +8796,77 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB44_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst: @@ -8940,8 +8940,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -8969,8 +8969,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: li a3, 255 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 @@ -9040,8 +9040,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -9069,8 +9069,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: li a3, 255 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 @@ -9098,8 +9098,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: li a3, 255 -; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 @@ -9127,8 +9127,8 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: li a3, 255 -; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 @@ -9222,8 +9222,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -9251,8 +9251,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -9280,8 +9280,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -9351,8 +9351,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -9380,8 +9380,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -9409,8 +9409,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -9438,8 +9438,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -9467,8 +9467,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -9496,8 +9496,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -9525,8 +9525,8 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -9620,8 +9620,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -9649,8 +9649,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -9678,8 +9678,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -9749,8 +9749,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -9778,8 +9778,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -9807,8 +9807,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -9836,8 +9836,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -9865,8 +9865,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -9894,8 +9894,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -9923,8 +9923,8 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -10018,8 +10018,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -10047,8 +10047,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -10076,8 +10076,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -10147,8 +10147,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -10176,8 +10176,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -10205,8 +10205,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -10234,8 +10234,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -10263,8 +10263,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -10292,8 +10292,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -10321,8 +10321,8 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -10416,8 +10416,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -10445,8 +10445,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: li a3, 255 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 @@ -10516,8 +10516,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -10545,8 +10545,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: li a3, 255 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 @@ -10574,8 +10574,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: li a3, 255 -; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 @@ -10603,8 +10603,8 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: li a3, 255 -; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 @@ -10698,8 +10698,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -10727,8 +10727,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: li a3, 255 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 @@ -10798,8 +10798,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -10827,8 +10827,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: li a3, 255 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 @@ -10856,8 +10856,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: li a3, 255 -; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 @@ -10885,8 +10885,8 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: li a3, 255 -; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 @@ -10980,8 +10980,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -11009,8 +11009,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11038,8 +11038,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11109,8 +11109,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -11138,8 +11138,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11167,8 +11167,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11196,8 +11196,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -11225,8 +11225,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -11254,8 +11254,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -11283,8 +11283,8 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -11378,8 +11378,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -11407,8 +11407,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11436,8 +11436,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11507,8 +11507,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -11536,8 +11536,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11565,8 +11565,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11594,8 +11594,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -11623,8 +11623,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -11652,8 +11652,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -11681,8 +11681,8 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -11776,8 +11776,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -11805,8 +11805,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11834,8 +11834,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 @@ -11905,8 +11905,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -11934,8 +11934,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: li a3, 255 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11963,8 +11963,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: li a3, 255 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 @@ -11992,8 +11992,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: li a3, 255 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 @@ -12021,8 +12021,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: li a3, 255 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 @@ -12050,8 +12050,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: li a3, 255 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 @@ -12079,8 +12079,8 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: li a3, 255 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 @@ -12174,8 +12174,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -12203,8 +12203,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: li a3, 255 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 ; RV32IA-NOZACAS-NEXT: srai a1, a1, 24 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 @@ -12274,8 +12274,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -12303,8 +12303,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: li a3, 255 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-NOZACAS-NEXT: srai a1, a1, 56 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 @@ -12332,8 +12332,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: li a3, 255 -; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: andi a4, a0, 24 +; RV32IA-ZACAS-NEXT: slli a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 ; RV32IA-ZACAS-NEXT: srai a1, a1, 24 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 @@ -12361,8 +12361,8 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: li a3, 255 -; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: andi a4, a0, 24 +; RV64IA-ZACAS-NEXT: slli a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 ; RV64IA-ZACAS-NEXT: srai a1, a1, 56 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 @@ -15477,44 +15477,44 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB65_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) ; RV32IA-NOZACAS-NEXT: mv a5, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB65_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic: @@ -15529,86 +15529,86 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB65_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) ; RV64IA-NOZACAS-NEXT: mv a5, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB65_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) ; RV32IA-ZACAS-NEXT: mv a5, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB65_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) ; RV64IA-ZACAS-NEXT: mv a5, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB65_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic: @@ -15647,65 +15647,65 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB66_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) ; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB66_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB66_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acquire: @@ -15720,149 +15720,149 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB66_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) ; RV64IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB66_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB66_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) ; RV32IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB66_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB66_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) ; RV64IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB66_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB66_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire: @@ -15901,65 +15901,65 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB67_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) ; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB67_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB67_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_release: @@ -15974,149 +15974,149 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB67_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) ; RV64IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB67_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB67_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) ; RV32IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB67_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB67_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) ; RV64IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB67_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB67_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release: @@ -16155,65 +16155,65 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB68_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) ; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB68_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB68_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel: @@ -16228,149 +16228,149 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB68_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) ; RV64IA-WMO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB68_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-NOZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB68_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) ; RV32IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB68_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV32IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB68_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) ; RV64IA-WMO-ZACAS-NEXT: mv a5, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB68_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) ; RV64IA-TSO-ZACAS-NEXT: mv a5, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB68_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel: @@ -16409,44 +16409,44 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB69_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) ; RV32IA-NOZACAS-NEXT: mv a5, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB69_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i16_seq_cst: @@ -16461,86 +16461,86 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB69_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) ; RV64IA-NOZACAS-NEXT: mv a5, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB69_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) ; RV32IA-ZACAS-NEXT: mv a5, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB69_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) ; RV64IA-ZACAS-NEXT: mv a5, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB69_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst: @@ -16583,31 +16583,31 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 -; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: not a1, a1 ; RV32I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) -; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB70_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a2, 16 -; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-NOZACAS-NEXT: not a2, a2 -; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: lui a1, 16 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: not a1, a1 +; RV32IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i16_monotonic: @@ -16623,55 +16623,55 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 -; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: not a1, a1 ; RV64I-ZALRSC-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) -; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB70_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a2, 16 -; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-NOZACAS-NEXT: not a2, a2 -; RV64IA-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: lui a1, 16 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: not a1, a1 +; RV64IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a2, 16 -; RV32IA-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-ZACAS-NEXT: not a2, a2 -; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: lui a1, 16 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: not a1, a1 +; RV32IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a2, 16 -; RV64IA-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-ZACAS-NEXT: not a2, a2 -; RV64IA-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: lui a1, 16 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: not a1, a1 +; RV64IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic: @@ -16711,43 +16711,43 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 -; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: not a1, a1 ; RV32I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB71_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i16_acquire: @@ -16763,91 +16763,91 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 -; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: not a1, a1 ; RV64I-ZALRSC-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB71_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: not a1, a1 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: not a1, a1 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: not a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: not a1, a1 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire: @@ -16887,43 +16887,43 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 -; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: not a1, a1 ; RV32I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) -; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB72_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i16_release: @@ -16939,91 +16939,91 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 -; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: not a1, a1 ; RV64I-ZALRSC-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) -; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB72_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: not a1, a1 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: not a1, a1 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: not a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: not a1, a1 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release: @@ -17063,43 +17063,43 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 -; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: not a1, a1 ; RV32I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB73_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i16_acq_rel: @@ -17115,91 +17115,91 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 -; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: not a1, a1 ; RV64I-ZALRSC-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB73_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: not a1, a1 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: not a1, a1 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: not a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: not a1, a1 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel: @@ -17239,43 +17239,43 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 -; RV32I-ZALRSC-NEXT: not a2, a2 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: not a1, a1 ; RV32I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) -; RV32I-ZALRSC-NEXT: and a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB74_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_0_i16_seq_cst: @@ -17291,91 +17291,91 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 -; RV64I-ZALRSC-NEXT: not a2, a2 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: not a1, a1 ; RV64I-ZALRSC-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) -; RV64I-ZALRSC-NEXT: and a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB74_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: not a2, a2 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: not a1, a1 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: not a2, a2 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: not a1, a1 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: not a2, a2 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: not a1, a1 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: not a2, a2 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: not a1, a1 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: not a2, a2 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: not a1, a1 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: not a2, a2 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: not a1, a1 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst: @@ -17416,29 +17416,29 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) -; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB75_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a2, 16 -; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: lui a1, 16 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: @@ -17455,51 +17455,51 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB75_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) -; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB75_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a2, 16 -; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: lui a1, 16 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a2, 16 -; RV32IA-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: lui a1, 16 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a2, 16 -; RV64IA-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: lui a1, 16 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: @@ -17544,40 +17544,40 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB76_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acquire: @@ -17594,84 +17594,84 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB76_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire: @@ -17716,40 +17716,40 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a1) -; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB77_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_release: @@ -17766,84 +17766,84 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a1) -; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB77_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release: @@ -17888,40 +17888,40 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB78_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: @@ -17938,84 +17938,84 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB78_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a1) -; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB78_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: @@ -18060,40 +18060,40 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a1, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a2, 16 -; RV32I-ZALRSC-NEXT: addi a2, a2, -1 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 +; RV32I-ZALRSC-NEXT: addi a1, a1, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) -; RV32I-ZALRSC-NEXT: or a4, a3, a2 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV32I-ZALRSC-NEXT: or a4, a3, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB79_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: @@ -18110,84 +18110,84 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a1, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a2, 16 -; RV64I-ZALRSC-NEXT: addi a2, a2, -1 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a1, 16 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 +; RV64I-ZALRSC-NEXT: addi a1, a1, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB79_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a1) -; RV64I-ZALRSC-NEXT: or a4, a3, a2 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a1) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) +; RV64I-ZALRSC-NEXT: or a4, a3, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB79_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a1, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a1, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a1, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a1, a1, -1 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: @@ -18230,44 +18230,44 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB80_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB80_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_monotonic: @@ -18282,86 +18282,86 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB80_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB80_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB80_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB80_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic: @@ -18400,65 +18400,65 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB81_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB81_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB81_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acquire: @@ -18473,149 +18473,149 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB81_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB81_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB81_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB81_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB81_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB81_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB81_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire: @@ -18654,65 +18654,65 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB82_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB82_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB82_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_release: @@ -18727,149 +18727,149 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB82_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB82_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB82_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB82_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB82_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB82_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB82_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release: @@ -18908,65 +18908,65 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB83_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB83_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB83_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_acq_rel: @@ -18981,149 +18981,149 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB83_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB83_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB83_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB83_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB83_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB83_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB83_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel: @@ -19162,44 +19162,44 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB84_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-NOZACAS-NEXT: add a5, a3, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NOZACAS-NEXT: add a5, a4, a1 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB84_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i16_seq_cst: @@ -19214,86 +19214,86 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB84_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NOZACAS-NEXT: add a5, a3, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NOZACAS-NEXT: add a5, a4, a1 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB84_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-ZACAS-NEXT: add a5, a3, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: add a5, a4, a1 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB84_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-ZACAS-NEXT: add a5, a3, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: add a5, a4, a1 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB84_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst: @@ -19332,44 +19332,44 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB85_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB85_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_monotonic: @@ -19384,86 +19384,86 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB85_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB85_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB85_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB85_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic: @@ -19506,65 +19506,65 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB86_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB86_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB86_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acquire: @@ -19579,149 +19579,149 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB86_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB86_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB86_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB86_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB86_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB86_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB86_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire: @@ -19764,65 +19764,65 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB87_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB87_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB87_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_release: @@ -19837,149 +19837,149 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB87_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB87_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB87_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB87_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB87_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB87_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB87_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release: @@ -20022,65 +20022,65 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB88_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB88_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB88_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_acq_rel: @@ -20095,149 +20095,149 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB88_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB88_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB88_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB88_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB88_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB88_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB88_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel: @@ -20280,44 +20280,44 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB89_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-NOZACAS-NEXT: sub a5, a3, a1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NOZACAS-NEXT: sub a5, a4, a1 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB89_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i16_seq_cst: @@ -20332,86 +20332,86 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB89_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NOZACAS-NEXT: sub a5, a3, a1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NOZACAS-NEXT: sub a5, a4, a1 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB89_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-ZACAS-NEXT: sub a5, a3, a1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: sub a5, a4, a1 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB89_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-ZACAS-NEXT: sub a5, a3, a1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: sub a5, a4, a1 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB89_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst: @@ -20454,37 +20454,37 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB90_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-NOZACAS-NEXT: not a3, a4 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: not a2, a4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_monotonic: @@ -20499,67 +20499,67 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB90_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-NOZACAS-NEXT: not a3, a4 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: not a2, a4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-ZACAS-NEXT: not a3, a4 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: not a2, a4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-ZACAS-NEXT: not a3, a4 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: not a2, a4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic: @@ -20598,52 +20598,52 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB91_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_acquire: @@ -20658,112 +20658,112 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB91_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: not a3, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: not a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: not a3, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: not a2, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: not a3, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: not a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: not a3, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: not a2, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire: @@ -20802,52 +20802,52 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB92_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_release: @@ -20862,112 +20862,112 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB92_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: not a3, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: not a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: not a3, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: not a2, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: not a3, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: not a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: not a3, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: not a2, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release: @@ -21006,52 +21006,52 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB93_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_acq_rel: @@ -21066,112 +21066,112 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB93_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: not a3, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: not a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: not a3, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: not a2, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: not a3, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: not a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: not a3, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: not a2, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel: @@ -21210,52 +21210,52 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB94_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-NOZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i16_seq_cst: @@ -21270,112 +21270,112 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB94_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: not a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: not a2, a4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: not a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: not a2, a4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: not a3, a4 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-WMO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: not a2, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: not a3, a4 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV32IA-TSO-ZACAS-NEXT: sll a4, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: not a2, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a3 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: not a3, a4 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: not a2, a4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: not a3, a4 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a3 -; RV64IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: slli a3, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-TSO-ZACAS-NEXT: sllw a4, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: not a2, a4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: or a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoand.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a3 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst: @@ -21414,46 +21414,46 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB95_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-NOZACAS-NEXT: not a5, a5 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_monotonic: @@ -21468,178 +21468,178 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB95_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-NOZACAS-NEXT: not a5, a5 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-ZACAS-NEXT: not a5, a5 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB95_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-ZACAS-NEXT: not a5, a5 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB95_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic: @@ -21718,68 +21718,68 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB96_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acquire: @@ -21794,244 +21794,244 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB96_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire: ; RV64IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire: @@ -22110,68 +22110,68 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB97_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_release: @@ -22186,244 +22186,244 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB97_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release: ; RV64IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release: @@ -22502,68 +22502,68 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB98_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_acq_rel: @@ -22578,244 +22578,244 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB98_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel: ; RV64IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel: @@ -22894,46 +22894,46 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB99_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-NOZACAS-NEXT: not a5, a5 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV32IA-NOZACAS-NEXT: # %bb.2: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_nand_i16_seq_cst: @@ -22948,178 +22948,178 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB99_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-NOZACAS-NEXT: not a5, a5 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV64IA-NOZACAS-NEXT: # %bb.2: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-ZACAS-NEXT: and a5, a3, a1 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a5, a4, a1 ; RV32IA-ZACAS-NEXT: not a5, a5 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB99_1 ; RV32IA-ZACAS-NEXT: # %bb.2: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-ZACAS-NEXT: and a5, a3, a1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a5, a4, a1 ; RV64IA-ZACAS-NEXT: not a5, a5 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB99_1 ; RV64IA-ZACAS-NEXT: # %bb.2: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT: ret ; ; RV64IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst: ; RV64IA-TSO-ZABHA-NOZACAS: # %bb.0: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5 -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2: -; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst: @@ -23129,8 +23129,8 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start ; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16 ; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2) ; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16 @@ -23145,8 +23145,8 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start ; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16 ; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2) ; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16 @@ -23161,8 +23161,8 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start ; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 48 ; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2) ; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 48 @@ -23177,8 +23177,8 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start ; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1 -; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3 +; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw ; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 48 ; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2) ; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 48 @@ -23202,29 +23202,29 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB100_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i16_monotonic: @@ -23239,51 +23239,51 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB100_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic: @@ -23322,40 +23322,40 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB101_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i16_acquire: @@ -23370,84 +23370,84 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB101_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire: @@ -23486,40 +23486,40 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB102_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i16_release: @@ -23534,84 +23534,84 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB102_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release: @@ -23650,40 +23650,40 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB103_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i16_acq_rel: @@ -23698,84 +23698,84 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB103_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel: @@ -23814,40 +23814,40 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB104_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i16_seq_cst: @@ -23862,84 +23862,84 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB104_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst: @@ -23978,29 +23978,29 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB105_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i16_monotonic: @@ -24015,51 +24015,51 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB105_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic: @@ -24098,40 +24098,40 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB106_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i16_acquire: @@ -24146,84 +24146,84 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB106_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aq a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire: @@ -24262,40 +24262,40 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB107_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i16_release: @@ -24310,84 +24310,84 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB107_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.rl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release: @@ -24426,40 +24426,40 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB108_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i16_acq_rel: @@ -24474,84 +24474,84 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB108_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel: @@ -24590,40 +24590,40 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB109_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i16_seq_cst: @@ -24638,84 +24638,84 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB109_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-NOZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-WMO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-TSO-ZACAS-NEXT: srl a0, a0, a2 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2) -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: slli a2, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: srli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a0, a2 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst: @@ -24789,30 +24789,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB110_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: @@ -24820,30 +24820,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: li a4, 16 -; RV32IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-NOZACAS-NEXT: li a5, 16 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-NOZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-NOZACAS-NEXT: mv a6, a5 -; RV32IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-NOZACAS-NEXT: mv a6, a4 +; RV32IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB110_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-NOZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-NOZACAS-NEXT: bnez a6, .LBB110_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_monotonic: @@ -24893,30 +24893,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB110_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB110_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic: @@ -24924,30 +24924,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: li a4, 48 -; RV64IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-NOZACAS-NEXT: li a5, 48 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-NOZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-NOZACAS-NEXT: mv a6, a5 -; RV64IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-NOZACAS-NEXT: mv a6, a4 +; RV64IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-NOZACAS-NEXT: bge a7, a1, .LBB110_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-NOZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV64IA-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-NOZACAS-NEXT: bnez a6, .LBB110_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic: @@ -24955,30 +24955,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-ZACAS-NEXT: li a4, 16 -; RV32IA-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-ZACAS-NEXT: li a5, 16 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-ZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-ZACAS-NEXT: mv a6, a5 -; RV32IA-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a4 +; RV32IA-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB110_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-ZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-ZACAS-NEXT: bnez a6, .LBB110_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic: @@ -24986,30 +24986,30 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-ZACAS-NEXT: li a4, 48 -; RV64IA-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-ZACAS-NEXT: li a5, 48 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-ZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-ZACAS-NEXT: mv a6, a5 -; RV64IA-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a4 +; RV64IA-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-ZACAS-NEXT: bge a7, a1, .LBB110_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-ZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1 ; RV64IA-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-ZACAS-NEXT: bnez a6, .LBB110_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic: @@ -25083,30 +25083,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB111_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25114,30 +25114,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB111_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25145,30 +25145,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB111_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_acquire: @@ -25218,30 +25218,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB111_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB111_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25249,30 +25249,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB111_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25280,30 +25280,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB111_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25311,30 +25311,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB111_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25342,30 +25342,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB111_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25373,30 +25373,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB111_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acquire: @@ -25404,30 +25404,30 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB111_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB111_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire: @@ -25501,30 +25501,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB112_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: @@ -25532,30 +25532,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB112_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_release: @@ -25563,30 +25563,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB112_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_release: @@ -25636,30 +25636,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB112_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB112_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release: @@ -25667,30 +25667,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB112_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_release: @@ -25698,30 +25698,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB112_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release: @@ -25729,30 +25729,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB112_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_release: @@ -25760,30 +25760,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB112_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release: @@ -25791,30 +25791,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB112_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_release: @@ -25822,30 +25822,30 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB112_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB112_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release: @@ -25919,30 +25919,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB113_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -25950,30 +25950,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB113_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -25981,30 +25981,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB113_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_acq_rel: @@ -26054,30 +26054,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB113_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB113_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26085,30 +26085,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB113_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26116,30 +26116,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB113_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26147,30 +26147,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB113_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26178,30 +26178,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB113_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26209,30 +26209,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB113_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel: @@ -26240,30 +26240,30 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB113_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB113_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel: @@ -26337,30 +26337,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB114_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: @@ -26368,30 +26368,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: li a4, 16 -; RV32IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-NOZACAS-NEXT: li a5, 16 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-NOZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV32IA-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-NOZACAS-NEXT: mv a6, a5 -; RV32IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-NOZACAS-NEXT: mv a6, a4 +; RV32IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB114_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-NOZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-NOZACAS-NEXT: bnez a6, .LBB114_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i16_seq_cst: @@ -26441,30 +26441,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB114_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB114_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst: @@ -26472,30 +26472,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: li a4, 48 -; RV64IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-NOZACAS-NEXT: li a5, 48 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-NOZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV64IA-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-NOZACAS-NEXT: mv a6, a5 -; RV64IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-NOZACAS-NEXT: mv a6, a4 +; RV64IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-NOZACAS-NEXT: bge a7, a1, .LBB114_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-NOZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV64IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-NOZACAS-NEXT: bnez a6, .LBB114_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst: @@ -26503,30 +26503,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-ZACAS-NEXT: li a4, 16 -; RV32IA-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-ZACAS-NEXT: li a5, 16 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-ZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV32IA-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-ZACAS-NEXT: mv a6, a5 -; RV32IA-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a4 +; RV32IA-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB114_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-ZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-ZACAS-NEXT: bnez a6, .LBB114_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst: @@ -26534,30 +26534,30 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-ZACAS-NEXT: li a4, 48 -; RV64IA-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-ZACAS-NEXT: li a5, 48 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-ZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV64IA-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-ZACAS-NEXT: mv a6, a5 -; RV64IA-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a4 +; RV64IA-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-ZACAS-NEXT: bge a7, a1, .LBB114_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-ZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1 ; RV64IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-ZACAS-NEXT: bnez a6, .LBB114_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst: @@ -26631,30 +26631,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB115_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: @@ -26662,30 +26662,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: li a4, 16 -; RV32IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-NOZACAS-NEXT: li a5, 16 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-NOZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-NOZACAS-NEXT: mv a6, a5 -; RV32IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-NOZACAS-NEXT: mv a6, a4 +; RV32IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB115_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-NOZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-NOZACAS-NEXT: bnez a6, .LBB115_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_monotonic: @@ -26735,30 +26735,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB115_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB115_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic: @@ -26766,30 +26766,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: li a4, 48 -; RV64IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-NOZACAS-NEXT: li a5, 48 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-NOZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-NOZACAS-NEXT: mv a6, a5 -; RV64IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-NOZACAS-NEXT: mv a6, a4 +; RV64IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-NOZACAS-NEXT: bge a1, a7, .LBB115_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-NOZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV64IA-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-NOZACAS-NEXT: bnez a6, .LBB115_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic: @@ -26797,30 +26797,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-ZACAS-NEXT: li a4, 16 -; RV32IA-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-ZACAS-NEXT: li a5, 16 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-ZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-ZACAS-NEXT: mv a6, a5 -; RV32IA-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a4 +; RV32IA-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB115_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-ZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-ZACAS-NEXT: bnez a6, .LBB115_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic: @@ -26828,30 +26828,30 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-ZACAS-NEXT: li a4, 48 -; RV64IA-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-ZACAS-NEXT: li a5, 48 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-ZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-ZACAS-NEXT: mv a6, a5 -; RV64IA-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a4 +; RV64IA-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-ZACAS-NEXT: bge a1, a7, .LBB115_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-ZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1 ; RV64IA-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-ZACAS-NEXT: bnez a6, .LBB115_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic: @@ -26925,30 +26925,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB116_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -26956,30 +26956,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB116_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -26987,30 +26987,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB116_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_acquire: @@ -27060,30 +27060,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB116_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB116_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27091,30 +27091,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB116_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27122,30 +27122,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB116_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27153,30 +27153,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB116_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27184,30 +27184,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB116_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27215,30 +27215,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB116_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acquire: @@ -27246,30 +27246,30 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB116_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB116_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire: @@ -27343,30 +27343,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB117_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: @@ -27374,30 +27374,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB117_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_release: @@ -27405,30 +27405,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB117_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_release: @@ -27478,30 +27478,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB117_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB117_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release: @@ -27509,30 +27509,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB117_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_release: @@ -27540,30 +27540,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB117_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release: @@ -27571,30 +27571,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB117_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_release: @@ -27602,30 +27602,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB117_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release: @@ -27633,30 +27633,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB117_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_release: @@ -27664,30 +27664,30 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB117_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB117_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release: @@ -27761,30 +27761,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB118_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -27792,30 +27792,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-NOZACAS-NEXT: li a4, 16 -; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-NOZACAS-NEXT: li a5, 16 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB118_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -27823,30 +27823,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-NOZACAS-NEXT: li a4, 16 -; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-NOZACAS-NEXT: li a5, 16 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB118_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_acq_rel: @@ -27896,30 +27896,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB118_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB118_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -27927,30 +27927,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-NOZACAS-NEXT: li a4, 48 -; RV64IA-WMO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-NOZACAS-NEXT: li a5, 48 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-NOZACAS-NEXT: bnez a6, .LBB118_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -27958,30 +27958,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-NOZACAS-NEXT: li a4, 48 -; RV64IA-TSO-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-NOZACAS-NEXT: li a5, 48 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-NOZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-NOZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV64IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-NOZACAS-NEXT: bnez a6, .LBB118_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -27989,30 +27989,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-WMO-ZACAS-NEXT: li a4, 16 -; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-WMO-ZACAS-NEXT: li a5, 16 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-WMO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV32IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-WMO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB118_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -28020,30 +28020,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-TSO-ZACAS-NEXT: li a4, 16 -; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-TSO-ZACAS-NEXT: li a5, 16 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-TSO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV32IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-TSO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB118_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -28051,30 +28051,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-WMO-ZACAS-NEXT: li a4, 48 -; RV64IA-WMO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-WMO-ZACAS-NEXT: li a5, 48 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-WMO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-WMO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-WMO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-WMO-ZACAS-NEXT: mv a6, a5 -; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2) +; RV64IA-WMO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-WMO-ZACAS-NEXT: mv a6, a4 +; RV64IA-WMO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-WMO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-WMO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-WMO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-WMO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-WMO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-WMO-ZACAS-NEXT: bnez a6, .LBB118_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel: @@ -28082,30 +28082,30 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-TSO-ZACAS-NEXT: li a4, 48 -; RV64IA-TSO-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-TSO-ZACAS-NEXT: li a5, 48 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-TSO-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-TSO-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-TSO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a5, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-TSO-ZACAS-NEXT: mv a6, a5 -; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a2) +; RV64IA-TSO-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-TSO-ZACAS-NEXT: mv a6, a4 +; RV64IA-TSO-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-TSO-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB118_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-TSO-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-TSO-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-TSO-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-TSO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1 ; RV64IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2) ; RV64IA-TSO-ZACAS-NEXT: bnez a6, .LBB118_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel: @@ -28179,30 +28179,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB119_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: @@ -28210,30 +28210,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 -; RV32IA-NOZACAS-NEXT: li a4, 16 -; RV32IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV32IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 ; RV32IA-NOZACAS-NEXT: sll a3, a3, a0 +; RV32IA-NOZACAS-NEXT: slli a1, a1, 16 +; RV32IA-NOZACAS-NEXT: srai a1, a1, 16 +; RV32IA-NOZACAS-NEXT: li a5, 16 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 -; RV32IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV32IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV32IA-NOZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV32IA-NOZACAS-NEXT: and a7, a5, a3 -; RV32IA-NOZACAS-NEXT: mv a6, a5 -; RV32IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV32IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NOZACAS-NEXT: and a7, a4, a3 +; RV32IA-NOZACAS-NEXT: mv a6, a4 +; RV32IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV32IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB119_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV32IA-NOZACAS-NEXT: and a6, a6, a3 -; RV32IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV32IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV32IA-NOZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-NOZACAS-NEXT: bnez a6, .LBB119_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a5, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i16_seq_cst: @@ -28283,30 +28283,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB119_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w.rl a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB119_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst: @@ -28314,30 +28314,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 ; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 -; RV64IA-NOZACAS-NEXT: li a4, 48 -; RV64IA-NOZACAS-NEXT: andi a5, a0, 24 +; RV64IA-NOZACAS-NEXT: andi a4, a0, 24 ; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 ; RV64IA-NOZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-NOZACAS-NEXT: slli a1, a1, 48 +; RV64IA-NOZACAS-NEXT: srai a1, a1, 48 +; RV64IA-NOZACAS-NEXT: li a5, 48 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-NOZACAS-NEXT: sub a4, a4, a5 +; RV64IA-NOZACAS-NEXT: sub a5, a5, a4 ; RV64IA-NOZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV64IA-NOZACAS-NEXT: and a7, a5, a3 -; RV64IA-NOZACAS-NEXT: mv a6, a5 -; RV64IA-NOZACAS-NEXT: sll a7, a7, a4 -; RV64IA-NOZACAS-NEXT: sra a7, a7, a4 +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NOZACAS-NEXT: and a7, a4, a3 +; RV64IA-NOZACAS-NEXT: mv a6, a4 +; RV64IA-NOZACAS-NEXT: sll a7, a7, a5 +; RV64IA-NOZACAS-NEXT: sra a7, a7, a5 ; RV64IA-NOZACAS-NEXT: bge a1, a7, .LBB119_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a1 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a1 ; RV64IA-NOZACAS-NEXT: and a6, a6, a3 -; RV64IA-NOZACAS-NEXT: xor a6, a5, a6 +; RV64IA-NOZACAS-NEXT: xor a6, a4, a6 ; RV64IA-NOZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV64IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-NOZACAS-NEXT: bnez a6, .LBB119_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst: @@ -28345,30 +28345,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32IA-ZACAS-NEXT: andi a2, a0, -4 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 ; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: slli a1, a1, 16 -; RV32IA-ZACAS-NEXT: li a4, 16 -; RV32IA-ZACAS-NEXT: andi a5, a0, 24 +; RV32IA-ZACAS-NEXT: andi a4, a0, 24 ; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: srai a1, a1, 16 ; RV32IA-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-ZACAS-NEXT: slli a1, a1, 16 +; RV32IA-ZACAS-NEXT: srai a1, a1, 16 +; RV32IA-ZACAS-NEXT: li a5, 16 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 -; RV32IA-ZACAS-NEXT: sub a4, a4, a5 +; RV32IA-ZACAS-NEXT: sub a5, a5, a4 ; RV32IA-ZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV32IA-ZACAS-NEXT: and a7, a5, a3 -; RV32IA-ZACAS-NEXT: mv a6, a5 -; RV32IA-ZACAS-NEXT: sll a7, a7, a4 -; RV32IA-ZACAS-NEXT: sra a7, a7, a4 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-ZACAS-NEXT: and a7, a4, a3 +; RV32IA-ZACAS-NEXT: mv a6, a4 +; RV32IA-ZACAS-NEXT: sll a7, a7, a5 +; RV32IA-ZACAS-NEXT: sra a7, a7, a5 ; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB119_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a6, a5, a1 +; RV32IA-ZACAS-NEXT: xor a6, a4, a1 ; RV32IA-ZACAS-NEXT: and a6, a6, a3 -; RV32IA-ZACAS-NEXT: xor a6, a5, a6 +; RV32IA-ZACAS-NEXT: xor a6, a4, a6 ; RV32IA-ZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV32IA-ZACAS-NEXT: bnez a6, .LBB119_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a5, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst: @@ -28376,30 +28376,30 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64IA-ZACAS-NEXT: andi a2, a0, -4 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 ; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: slli a1, a1, 48 -; RV64IA-ZACAS-NEXT: li a4, 48 -; RV64IA-ZACAS-NEXT: andi a5, a0, 24 +; RV64IA-ZACAS-NEXT: andi a4, a0, 24 ; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: srai a1, a1, 48 ; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: slli a1, a1, 48 +; RV64IA-ZACAS-NEXT: srai a1, a1, 48 +; RV64IA-ZACAS-NEXT: li a5, 48 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 -; RV64IA-ZACAS-NEXT: sub a4, a4, a5 +; RV64IA-ZACAS-NEXT: sub a5, a5, a4 ; RV64IA-ZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a5, (a2) -; RV64IA-ZACAS-NEXT: and a7, a5, a3 -; RV64IA-ZACAS-NEXT: mv a6, a5 -; RV64IA-ZACAS-NEXT: sll a7, a7, a4 -; RV64IA-ZACAS-NEXT: sra a7, a7, a4 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-ZACAS-NEXT: and a7, a4, a3 +; RV64IA-ZACAS-NEXT: mv a6, a4 +; RV64IA-ZACAS-NEXT: sll a7, a7, a5 +; RV64IA-ZACAS-NEXT: sra a7, a7, a5 ; RV64IA-ZACAS-NEXT: bge a1, a7, .LBB119_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a6, a5, a1 +; RV64IA-ZACAS-NEXT: xor a6, a4, a1 ; RV64IA-ZACAS-NEXT: and a6, a6, a3 -; RV64IA-ZACAS-NEXT: xor a6, a5, a6 +; RV64IA-ZACAS-NEXT: xor a6, a4, a6 ; RV64IA-ZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1 ; RV64IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2) ; RV64IA-ZACAS-NEXT: bnez a6, .LBB119_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a5, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst: @@ -28434,34 +28434,34 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB120_2 ; RV32I-NEXT: .LBB120_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB120_4 ; RV32I-NEXT: .LBB120_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB120_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB120_1 ; RV32I-NEXT: .LBB120_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28472,52 +28472,52 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB120_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-NOZACAS-NEXT: mv a5, a3 +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-NOZACAS-NEXT: mv a5, a4 ; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB120_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-NOZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB120_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_monotonic: @@ -28528,34 +28528,34 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB120_2 ; RV64I-NEXT: .LBB120_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB120_4 ; RV64I-NEXT: .LBB120_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB120_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB120_1 ; RV64I-NEXT: .LBB120_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -28566,102 +28566,102 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB120_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB120_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-NOZACAS-NEXT: mv a5, a3 +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-NOZACAS-NEXT: mv a5, a4 ; RV64IA-NOZACAS-NEXT: bgeu a6, a1, .LBB120_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-NOZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB120_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-ZACAS-NEXT: mv a5, a3 +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-ZACAS-NEXT: mv a5, a4 ; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB120_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-ZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB120_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-ZACAS-NEXT: mv a5, a3 +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-ZACAS-NEXT: mv a5, a4 ; RV64IA-ZACAS-NEXT: bgeu a6, a1, .LBB120_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-ZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB120_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic: @@ -28696,34 +28696,34 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB121_2 ; RV32I-NEXT: .LBB121_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB121_4 ; RV32I-NEXT: .LBB121_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB121_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB121_1 ; RV32I-NEXT: .LBB121_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28734,77 +28734,77 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB121_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB121_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB121_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acquire: @@ -28815,34 +28815,34 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB121_2 ; RV64I-NEXT: .LBB121_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB121_4 ; RV64I-NEXT: .LBB121_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB121_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB121_1 ; RV64I-NEXT: .LBB121_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -28853,177 +28853,177 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB121_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB121_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB121_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB121_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB121_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB121_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB121_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB121_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire: @@ -29058,34 +29058,34 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB122_2 ; RV32I-NEXT: .LBB122_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB122_4 ; RV32I-NEXT: .LBB122_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB122_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB122_1 ; RV32I-NEXT: .LBB122_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29096,77 +29096,77 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB122_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB122_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB122_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_release: @@ -29177,34 +29177,34 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB122_2 ; RV64I-NEXT: .LBB122_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB122_4 ; RV64I-NEXT: .LBB122_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB122_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB122_1 ; RV64I-NEXT: .LBB122_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -29215,177 +29215,177 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB122_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB122_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB122_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB122_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB122_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB122_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 -; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB122_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB122_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release: @@ -29420,34 +29420,34 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB123_2 ; RV32I-NEXT: .LBB123_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB123_4 ; RV32I-NEXT: .LBB123_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB123_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB123_1 ; RV32I-NEXT: .LBB123_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29458,77 +29458,77 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB123_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB123_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB123_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_acq_rel: @@ -29539,34 +29539,34 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB123_2 ; RV64I-NEXT: .LBB123_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB123_4 ; RV64I-NEXT: .LBB123_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB123_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB123_1 ; RV64I-NEXT: .LBB123_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -29577,177 +29577,177 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB123_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB123_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB123_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB123_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB123_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB123_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB123_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB123_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel: @@ -29782,34 +29782,34 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB124_2 ; RV32I-NEXT: .LBB124_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB124_4 ; RV32I-NEXT: .LBB124_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB124_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB124_1 ; RV32I-NEXT: .LBB124_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29820,52 +29820,52 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB124_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-NOZACAS-NEXT: mv a5, a3 +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-NOZACAS-NEXT: mv a5, a4 ; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB124_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-NOZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB124_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i16_seq_cst: @@ -29876,34 +29876,34 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB124_2 ; RV64I-NEXT: .LBB124_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB124_4 ; RV64I-NEXT: .LBB124_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB124_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB124_1 ; RV64I-NEXT: .LBB124_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -29914,102 +29914,102 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB124_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB124_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-NOZACAS-NEXT: mv a5, a3 +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-NOZACAS-NEXT: mv a5, a4 ; RV64IA-NOZACAS-NEXT: bgeu a6, a1, .LBB124_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-NOZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB124_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-ZACAS-NEXT: mv a5, a3 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-ZACAS-NEXT: mv a5, a4 ; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB124_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-ZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB124_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-ZACAS-NEXT: mv a5, a3 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-ZACAS-NEXT: mv a5, a4 ; RV64IA-ZACAS-NEXT: bgeu a6, a1, .LBB124_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-ZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB124_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst: @@ -30044,34 +30044,34 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB125_2 ; RV32I-NEXT: .LBB125_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB125_4 ; RV32I-NEXT: .LBB125_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB125_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB125_1 ; RV32I-NEXT: .LBB125_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -30082,52 +30082,52 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB125_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-NOZACAS-NEXT: mv a5, a3 +; RV32IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-NOZACAS-NEXT: mv a5, a4 ; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB125_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-NOZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB125_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_monotonic: @@ -30138,34 +30138,34 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB125_2 ; RV64I-NEXT: .LBB125_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB125_4 ; RV64I-NEXT: .LBB125_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB125_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB125_1 ; RV64I-NEXT: .LBB125_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -30176,102 +30176,102 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB125_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB125_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-NOZACAS-NEXT: mv a5, a3 +; RV64IA-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-NOZACAS-NEXT: mv a5, a4 ; RV64IA-NOZACAS-NEXT: bgeu a1, a6, .LBB125_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-NOZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB125_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-ZACAS-NEXT: mv a5, a3 +; RV32IA-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-ZACAS-NEXT: mv a5, a4 ; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB125_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-ZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB125_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-ZACAS-NEXT: mv a5, a3 +; RV64IA-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-ZACAS-NEXT: mv a5, a4 ; RV64IA-ZACAS-NEXT: bgeu a1, a6, .LBB125_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-ZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1 -; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB125_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic: @@ -30306,34 +30306,34 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB126_2 ; RV32I-NEXT: .LBB126_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB126_4 ; RV32I-NEXT: .LBB126_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB126_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB126_1 ; RV32I-NEXT: .LBB126_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -30344,77 +30344,77 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB126_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB126_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB126_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acquire: @@ -30425,34 +30425,34 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB126_2 ; RV64I-NEXT: .LBB126_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB126_4 ; RV64I-NEXT: .LBB126_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB126_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB126_1 ; RV64I-NEXT: .LBB126_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -30463,177 +30463,177 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acquire: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB126_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB126_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB126_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB126_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB126_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB126_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB126_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB126_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire: @@ -30668,34 +30668,34 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB127_2 ; RV32I-NEXT: .LBB127_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB127_4 ; RV32I-NEXT: .LBB127_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB127_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB127_1 ; RV32I-NEXT: .LBB127_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -30706,77 +30706,77 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_release: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB127_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB127_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB127_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_release: @@ -30787,34 +30787,34 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB127_2 ; RV64I-NEXT: .LBB127_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB127_4 ; RV64I-NEXT: .LBB127_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB127_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB127_1 ; RV64I-NEXT: .LBB127_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -30825,177 +30825,177 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_release: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB127_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB127_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB127_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB127_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB127_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_release: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB127_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB127_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_release: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB127_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release: @@ -31030,34 +31030,34 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB128_2 ; RV32I-NEXT: .LBB128_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB128_4 ; RV32I-NEXT: .LBB128_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB128_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB128_1 ; RV32I-NEXT: .LBB128_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -31068,77 +31068,77 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB128_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-WMO-NOZACAS: # %bb.0: -; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB128_1 ; RV32IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-NOZACAS-NEXT: ret ; ; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-TSO-NOZACAS: # %bb.0: -; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB128_1 ; RV32IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_acq_rel: @@ -31149,34 +31149,34 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB128_2 ; RV64I-NEXT: .LBB128_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB128_4 ; RV64I-NEXT: .LBB128_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB128_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB128_1 ; RV64I-NEXT: .LBB128_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -31187,177 +31187,177 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aq a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aq a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB128_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB128_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-WMO-NOZACAS: # %bb.0: -; RV64IA-WMO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-NOZACAS-NEXT: bnez a5, .LBB128_1 ; RV64IA-WMO-NOZACAS-NEXT: # %bb.4: -; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-NOZACAS-NEXT: ret ; ; RV64IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-TSO-NOZACAS: # %bb.0: -; RV64IA-TSO-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-NOZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-NOZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-NOZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-NOZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-NOZACAS-NEXT: bnez a5, .LBB128_1 ; RV64IA-TSO-NOZACAS-NEXT: # %bb.4: -; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-NOZACAS-NEXT: ret ; ; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-WMO-ZACAS: # %bb.0: -; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-WMO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-WMO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB128_1 ; RV32IA-WMO-ZACAS-NEXT: # %bb.4: -; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-WMO-ZACAS-NEXT: ret ; ; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA-TSO-ZACAS: # %bb.0: -; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-TSO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-TSO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB128_1 ; RV32IA-TSO-ZACAS-NEXT: # %bb.4: -; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-TSO-ZACAS-NEXT: ret ; ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-WMO-ZACAS: # %bb.0: -; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: lui a2, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 -; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-WMO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-WMO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-WMO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2) -; RV64IA-WMO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-WMO-ZACAS-NEXT: mv a5, a3 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a3) +; RV64IA-WMO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-WMO-ZACAS-NEXT: mv a5, a4 ; RV64IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-WMO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-WMO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB128_1 ; RV64IA-WMO-ZACAS-NEXT: # %bb.4: -; RV64IA-WMO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-WMO-ZACAS-NEXT: ret ; ; RV64IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA-TSO-ZACAS: # %bb.0: -; RV64IA-TSO-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: lui a2, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 -; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-TSO-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-TSO-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-TSO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-TSO-ZACAS-NEXT: lr.w a3, (a2) -; RV64IA-TSO-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-TSO-ZACAS-NEXT: mv a5, a3 +; RV64IA-TSO-ZACAS-NEXT: lr.w a4, (a3) +; RV64IA-TSO-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-TSO-ZACAS-NEXT: mv a5, a4 ; RV64IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3 ; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-TSO-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-TSO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1 -; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2) +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a3) ; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB128_1 ; RV64IA-TSO-ZACAS-NEXT: # %bb.4: -; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-TSO-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel: @@ -31392,34 +31392,34 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB129_2 ; RV32I-NEXT: .LBB129_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB129_4 ; RV32I-NEXT: .LBB129_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB129_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB129_1 ; RV32I-NEXT: .LBB129_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -31430,52 +31430,52 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB129_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: ret ; ; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV32IA-NOZACAS-NEXT: lui a2, 16 +; RV32IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV32IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV32IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV32IA-NOZACAS-NEXT: lui a3, 16 -; RV32IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV32IA-NOZACAS-NEXT: sll a4, a3, a0 -; RV32IA-NOZACAS-NEXT: and a1, a1, a3 +; RV32IA-NOZACAS-NEXT: and a1, a1, a2 +; RV32IA-NOZACAS-NEXT: sll a2, a2, a0 ; RV32IA-NOZACAS-NEXT: sll a1, a1, a0 ; RV32IA-NOZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-NOZACAS-NEXT: and a6, a3, a4 -; RV32IA-NOZACAS-NEXT: mv a5, a3 +; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-NOZACAS-NEXT: and a6, a4, a2 +; RV32IA-NOZACAS-NEXT: mv a5, a4 ; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB129_3 ; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV32IA-NOZACAS-NEXT: and a5, a5, a4 -; RV32IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV32IA-NOZACAS-NEXT: and a5, a5, a2 +; RV32IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV32IA-NOZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-NOZACAS-NEXT: bnez a5, .LBB129_1 ; RV32IA-NOZACAS-NEXT: # %bb.4: -; RV32IA-NOZACAS-NEXT: srl a0, a3, a0 +; RV32IA-NOZACAS-NEXT: srl a0, a4, a0 ; RV32IA-NOZACAS-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i16_seq_cst: @@ -31486,34 +31486,34 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB129_2 ; RV64I-NEXT: .LBB129_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB129_4 ; RV64I-NEXT: .LBB129_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB129_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB129_1 ; RV64I-NEXT: .LBB129_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -31524,102 +31524,102 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w.aqrl a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w.aqrl a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB129_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w.rl a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB129_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: ret ; ; RV64IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a0, -4 +; RV64IA-NOZACAS-NEXT: lui a2, 16 +; RV64IA-NOZACAS-NEXT: andi a3, a0, -4 +; RV64IA-NOZACAS-NEXT: addi a2, a2, -1 ; RV64IA-NOZACAS-NEXT: slli a0, a0, 3 -; RV64IA-NOZACAS-NEXT: lui a3, 16 -; RV64IA-NOZACAS-NEXT: addi a3, a3, -1 -; RV64IA-NOZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-NOZACAS-NEXT: and a1, a1, a3 +; RV64IA-NOZACAS-NEXT: and a1, a1, a2 +; RV64IA-NOZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-NOZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-NOZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NOZACAS-NEXT: and a6, a3, a4 -; RV64IA-NOZACAS-NEXT: mv a5, a3 +; RV64IA-NOZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-NOZACAS-NEXT: and a6, a4, a2 +; RV64IA-NOZACAS-NEXT: mv a5, a4 ; RV64IA-NOZACAS-NEXT: bgeu a1, a6, .LBB129_3 ; RV64IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a1 -; RV64IA-NOZACAS-NEXT: and a5, a5, a4 -; RV64IA-NOZACAS-NEXT: xor a5, a3, a5 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a1 +; RV64IA-NOZACAS-NEXT: and a5, a5, a2 +; RV64IA-NOZACAS-NEXT: xor a5, a4, a5 ; RV64IA-NOZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-NOZACAS-NEXT: bnez a5, .LBB129_1 ; RV64IA-NOZACAS-NEXT: # %bb.4: -; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA-ZACAS: # %bb.0: -; RV32IA-ZACAS-NEXT: andi a2, a0, -4 +; RV32IA-ZACAS-NEXT: lui a2, 16 +; RV32IA-ZACAS-NEXT: andi a3, a0, -4 +; RV32IA-ZACAS-NEXT: addi a2, a2, -1 ; RV32IA-ZACAS-NEXT: slli a0, a0, 3 -; RV32IA-ZACAS-NEXT: lui a3, 16 -; RV32IA-ZACAS-NEXT: addi a3, a3, -1 -; RV32IA-ZACAS-NEXT: sll a4, a3, a0 -; RV32IA-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-ZACAS-NEXT: and a1, a1, a2 +; RV32IA-ZACAS-NEXT: sll a2, a2, a0 ; RV32IA-ZACAS-NEXT: sll a1, a1, a0 ; RV32IA-ZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV32IA-ZACAS-NEXT: and a6, a3, a4 -; RV32IA-ZACAS-NEXT: mv a5, a3 +; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV32IA-ZACAS-NEXT: and a6, a4, a2 +; RV32IA-ZACAS-NEXT: mv a5, a4 ; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB129_3 ; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV32IA-ZACAS-NEXT: xor a5, a3, a1 -; RV32IA-ZACAS-NEXT: and a5, a5, a4 -; RV32IA-ZACAS-NEXT: xor a5, a3, a5 +; RV32IA-ZACAS-NEXT: xor a5, a4, a1 +; RV32IA-ZACAS-NEXT: and a5, a5, a2 +; RV32IA-ZACAS-NEXT: xor a5, a4, a5 ; RV32IA-ZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV32IA-ZACAS-NEXT: bnez a5, .LBB129_1 ; RV32IA-ZACAS-NEXT: # %bb.4: -; RV32IA-ZACAS-NEXT: srl a0, a3, a0 +; RV32IA-ZACAS-NEXT: srl a0, a4, a0 ; RV32IA-ZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA-ZACAS: # %bb.0: -; RV64IA-ZACAS-NEXT: andi a2, a0, -4 +; RV64IA-ZACAS-NEXT: lui a2, 16 +; RV64IA-ZACAS-NEXT: andi a3, a0, -4 +; RV64IA-ZACAS-NEXT: addi a2, a2, -1 ; RV64IA-ZACAS-NEXT: slli a0, a0, 3 -; RV64IA-ZACAS-NEXT: lui a3, 16 -; RV64IA-ZACAS-NEXT: addi a3, a3, -1 -; RV64IA-ZACAS-NEXT: sllw a4, a3, a0 -; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a1, a1, a2 +; RV64IA-ZACAS-NEXT: sllw a2, a2, a0 ; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 ; RV64IA-ZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-ZACAS-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-ZACAS-NEXT: and a6, a3, a4 -; RV64IA-ZACAS-NEXT: mv a5, a3 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a4, (a3) +; RV64IA-ZACAS-NEXT: and a6, a4, a2 +; RV64IA-ZACAS-NEXT: mv a5, a4 ; RV64IA-ZACAS-NEXT: bgeu a1, a6, .LBB129_3 ; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1 -; RV64IA-ZACAS-NEXT: xor a5, a3, a1 -; RV64IA-ZACAS-NEXT: and a5, a5, a4 -; RV64IA-ZACAS-NEXT: xor a5, a3, a5 +; RV64IA-ZACAS-NEXT: xor a5, a4, a1 +; RV64IA-ZACAS-NEXT: and a5, a5, a2 +; RV64IA-ZACAS-NEXT: xor a5, a4, a5 ; RV64IA-ZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1 -; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2) +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a3) ; RV64IA-ZACAS-NEXT: bnez a5, .LBB129_1 ; RV64IA-ZACAS-NEXT: # %bb.4: -; RV64IA-ZACAS-NEXT: srlw a0, a3, a0 +; RV64IA-ZACAS-NEXT: srlw a0, a4, a0 ; RV64IA-ZACAS-NEXT: ret ; ; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst: diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 7fe5fa7365eb5..70307c1764e8d 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -521,37 +521,37 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: sll a3, a3, a2 ; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: not a3, a3 -; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: andi a0, a0, -4 ; RV32IA-NEXT: or a1, a1, a3 -; RV32IA-NEXT: amoand.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 +; RV32I-ZALRSC-NEXT: sll a3, a3, a2 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: not a3, a3 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 ; RV32I-ZALRSC-NEXT: or a1, a1, a3 ; RV32I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB6_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: slli a0, a0, 24 ; RV32I-ZALRSC-NEXT: srai a0, a0, 24 ; RV32I-ZALRSC-NEXT: ret @@ -570,37 +570,37 @@ define signext i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: li a3, 255 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: zext.b a1, a1 -; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: not a3, a3 -; RV64IA-NEXT: sllw a1, a1, a0 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: or a1, a1, a3 -; RV64IA-NEXT: amoand.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: amoand.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 +; RV64I-ZALRSC-NEXT: sllw a3, a3, a2 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: not a3, a3 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 ; RV64I-ZALRSC-NEXT: or a1, a1, a3 ; RV64I-ZALRSC-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB6_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: slli a0, a0, 56 ; RV64I-ZALRSC-NEXT: srai a0, a0, 56 ; RV64I-ZALRSC-NEXT: ret @@ -743,29 +743,29 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: amoor.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB8_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: slli a0, a0, 24 ; RV32I-ZALRSC-NEXT: srai a0, a0, 24 ; RV32I-ZALRSC-NEXT: ret @@ -784,29 +784,29 @@ define signext i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: zext.b a1, a1 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: amoor.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: amoor.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB8_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: slli a0, a0, 56 ; RV64I-ZALRSC-NEXT: srai a0, a0, 56 ; RV64I-ZALRSC-NEXT: ret @@ -829,29 +829,29 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: zext.b a1, a1 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB9_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: slli a0, a0, 24 ; RV32I-ZALRSC-NEXT: srai a0, a0, 24 ; RV32I-ZALRSC-NEXT: ret @@ -870,29 +870,29 @@ define signext i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: zext.b a1, a1 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i8_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB9_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: slli a0, a0, 56 ; RV64I-ZALRSC-NEXT: srai a0, a0, 56 ; RV64I-ZALRSC-NEXT: ret @@ -949,8 +949,8 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: andi a4, a0, 24 +; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 @@ -980,8 +980,8 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -1054,8 +1054,8 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: andi a4, a0, 24 +; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1085,8 +1085,8 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -1163,8 +1163,8 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: andi a4, a0, 24 +; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 @@ -1194,8 +1194,8 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: li a3, 255 -; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: andi a4, a0, 24 +; RV32I-ZALRSC-NEXT: slli a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 ; RV32I-ZALRSC-NEXT: srai a1, a1, 24 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 @@ -1268,8 +1268,8 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: andi a4, a0, 24 +; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 @@ -1299,8 +1299,8 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: li a3, 255 -; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: andi a4, a0, 24 +; RV64I-ZALRSC-NEXT: slli a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 ; RV64I-ZALRSC-NEXT: srai a1, a1, 56 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 @@ -1723,46 +1723,46 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: lr.w a4, (a3) ; RV32IA-NEXT: mv a5, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB14_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) ; RV32I-ZALRSC-NEXT: mv a5, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB14_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -1781,46 +1781,46 @@ define signext i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: lr.w a4, (a3) ; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB14_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_xchg_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) ; RV64I-ZALRSC-NEXT: mv a5, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB14_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -1843,46 +1843,46 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_add_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: add a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: lr.w a4, (a3) +; RV32IA-NEXT: add a5, a4, a1 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB15_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: add a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: add a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB15_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -1901,46 +1901,46 @@ define signext i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_add_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: add a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: lr.w a4, (a3) +; RV64IA-NEXT: add a5, a4, a1 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB15_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_add_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: add a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: add a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB15_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -1963,46 +1963,46 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_sub_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: sub a5, a3, a1 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: lr.w a4, (a3) +; RV32IA-NEXT: sub a5, a4, a1 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB16_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: sub a5, a3, a1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: sub a5, a4, a1 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB16_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2021,46 +2021,46 @@ define signext i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_sub_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: sub a5, a3, a1 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: lr.w a4, (a3) +; RV64IA-NEXT: sub a5, a4, a1 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB16_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_sub_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: sub a5, a3, a1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: sub a5, a4, a1 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB16_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2083,39 +2083,39 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_and_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 -; RV32IA-NEXT: not a3, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: or a1, a1, a3 -; RV32IA-NEXT: amoand.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: addi a2, a2, -1 +; RV32IA-NEXT: sll a4, a2, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: not a2, a4 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: or a1, a1, a2 +; RV32IA-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 -; RV32I-ZALRSC-NEXT: not a3, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: or a1, a1, a3 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 +; RV32I-ZALRSC-NEXT: sll a4, a2, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: not a2, a4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: or a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: lr.w a2, (a0) +; RV32I-ZALRSC-NEXT: and a4, a2, a1 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB17_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a2, a3 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2134,39 +2134,39 @@ define signext i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_and_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 -; RV64IA-NEXT: not a3, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: or a1, a1, a3 -; RV64IA-NEXT: amoand.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: addi a2, a2, -1 +; RV64IA-NEXT: sllw a4, a2, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: not a2, a4 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: or a1, a1, a2 +; RV64IA-NEXT: amoand.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a3 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_and_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 -; RV64I-ZALRSC-NEXT: not a3, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: or a1, a1, a3 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 +; RV64I-ZALRSC-NEXT: sllw a4, a2, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: not a2, a4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: or a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: lr.w a2, (a0) +; RV64I-ZALRSC-NEXT: and a4, a2, a1 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB17_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a2, a3 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2189,48 +2189,48 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_nand_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a5, a3, a1 +; RV32IA-NEXT: lr.w a4, (a3) +; RV32IA-NEXT: and a5, a4, a1 ; RV32IA-NEXT: not a5, a5 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB18_1 ; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a5, a3, a1 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a5, a4, a1 ; RV32I-ZALRSC-NEXT: not a5, a5 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB18_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2249,48 +2249,48 @@ define signext i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_nand_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a5, a3, a1 +; RV64IA-NEXT: lr.w a4, (a3) +; RV64IA-NEXT: and a5, a4, a1 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB18_1 ; RV64IA-NEXT: # %bb.2: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_nand_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a5, a3, a1 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a5, a4, a1 ; RV64I-ZALRSC-NEXT: not a5, a5 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB18_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2313,31 +2313,31 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_or_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a1, a1, 16 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: srli a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: amoor.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: or a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB19_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2356,31 +2356,31 @@ define signext i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_or_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: slli a1, a1, 48 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: srli a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: amoor.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: amoor.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_or_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: or a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB19_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2403,31 +2403,31 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_xor_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a1, a1, 16 +; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: srli a1, a1, 16 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: amoxor.w a1, a1, (a2) -; RV32IA-NEXT: srl a0, a1, a0 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: slli a2, a0, 3 ; RV32I-ZALRSC-NEXT: srli a1, a1, 16 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a1, a1, a2 ; RV32I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) ; RV32I-ZALRSC-NEXT: xor a4, a3, a1 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB20_1 ; RV32I-ZALRSC-NEXT: # %bb.2: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a2 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2446,31 +2446,31 @@ define signext i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_xor_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: slli a1, a1, 48 +; RV64IA-NEXT: slli a2, a0, 3 ; RV64IA-NEXT: srli a1, a1, 48 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: amoxor.w a1, a1, (a2) -; RV64IA-NEXT: srlw a0, a1, a0 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a1, a1, a2 +; RV64IA-NEXT: amoxor.w a0, a1, (a0) +; RV64IA-NEXT: srlw a0, a0, a2 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_xor_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: slli a2, a0, 3 ; RV64I-ZALRSC-NEXT: srli a1, a1, 48 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a2 ; RV64I-ZALRSC-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) ; RV64I-ZALRSC-NEXT: xor a4, a3, a1 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a2) +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB20_1 ; RV64I-ZALRSC-NEXT: # %bb.2: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a2 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2527,30 +2527,30 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: li a4, 16 -; RV32IA-NEXT: andi a5, a0, 24 +; RV32IA-NEXT: andi a4, a0, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: slli a1, a1, 16 +; RV32IA-NEXT: srai a1, a1, 16 +; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sub a4, a4, a5 +; RV32IA-NEXT: sub a5, a5, a4 ; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a2) -; RV32IA-NEXT: and a7, a5, a3 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a4 -; RV32IA-NEXT: sra a7, a7, a4 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a7, a4, a3 +; RV32IA-NEXT: mv a6, a4 +; RV32IA-NEXT: sll a7, a7, a5 +; RV32IA-NEXT: sra a7, a7, a5 ; RV32IA-NEXT: bge a7, a1, .LBB21_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: xor a6, a4, a1 ; RV32IA-NEXT: and a6, a6, a3 -; RV32IA-NEXT: xor a6, a5, a6 +; RV32IA-NEXT: xor a6, a4, a6 ; RV32IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ; RV32IA-NEXT: sc.w a6, a6, (a2) ; RV32IA-NEXT: bnez a6, .LBB21_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret @@ -2560,30 +2560,30 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB21_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2636,30 +2636,30 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: li a4, 48 -; RV64IA-NEXT: andi a5, a0, 24 +; RV64IA-NEXT: andi a4, a0, 24 ; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a3, a3, a0 +; RV64IA-NEXT: slli a1, a1, 48 +; RV64IA-NEXT: srai a1, a1, 48 +; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sub a4, a4, a5 +; RV64IA-NEXT: sub a5, a5, a4 ; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a2) -; RV64IA-NEXT: and a7, a5, a3 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a4 -; RV64IA-NEXT: sra a7, a7, a4 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a7, a4, a3 +; RV64IA-NEXT: mv a6, a4 +; RV64IA-NEXT: sll a7, a7, a5 +; RV64IA-NEXT: sra a7, a7, a5 ; RV64IA-NEXT: bge a7, a1, .LBB21_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: xor a6, a4, a1 ; RV64IA-NEXT: and a6, a6, a3 -; RV64IA-NEXT: xor a6, a5, a6 +; RV64IA-NEXT: xor a6, a4, a6 ; RV64IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ; RV64IA-NEXT: sc.w a6, a6, (a2) ; RV64IA-NEXT: bnez a6, .LBB21_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret @@ -2669,30 +2669,30 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a7, a1, .LBB21_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB21_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2749,30 +2749,30 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: slli a1, a1, 16 -; RV32IA-NEXT: li a4, 16 -; RV32IA-NEXT: andi a5, a0, 24 +; RV32IA-NEXT: andi a4, a0, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: slli a1, a1, 16 +; RV32IA-NEXT: srai a1, a1, 16 +; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sub a4, a4, a5 +; RV32IA-NEXT: sub a5, a5, a4 ; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a2) -; RV32IA-NEXT: and a7, a5, a3 -; RV32IA-NEXT: mv a6, a5 -; RV32IA-NEXT: sll a7, a7, a4 -; RV32IA-NEXT: sra a7, a7, a4 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a7, a4, a3 +; RV32IA-NEXT: mv a6, a4 +; RV32IA-NEXT: sll a7, a7, a5 +; RV32IA-NEXT: sra a7, a7, a5 ; RV32IA-NEXT: bge a1, a7, .LBB22_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: xor a6, a4, a1 ; RV32IA-NEXT: and a6, a6, a3 -; RV32IA-NEXT: xor a6, a5, a6 +; RV32IA-NEXT: xor a6, a4, a6 ; RV32IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 ; RV32IA-NEXT: sc.w a6, a6, (a2) ; RV32IA-NEXT: bnez a6, .LBB22_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret @@ -2782,30 +2782,30 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-ZALRSC-NEXT: andi a2, a0, -4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 ; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: slli a1, a1, 16 -; RV32I-ZALRSC-NEXT: li a4, 16 -; RV32I-ZALRSC-NEXT: andi a5, a0, 24 +; RV32I-ZALRSC-NEXT: andi a4, a0, 24 ; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: srai a1, a1, 16 ; RV32I-ZALRSC-NEXT: sll a3, a3, a0 +; RV32I-ZALRSC-NEXT: slli a1, a1, 16 +; RV32I-ZALRSC-NEXT: srai a1, a1, 16 +; RV32I-ZALRSC-NEXT: li a5, 16 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sub a4, a4, a5 +; RV32I-ZALRSC-NEXT: sub a5, a5, a4 ; RV32I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a2) -; RV32I-ZALRSC-NEXT: and a7, a5, a3 -; RV32I-ZALRSC-NEXT: mv a6, a5 -; RV32I-ZALRSC-NEXT: sll a7, a7, a4 -; RV32I-ZALRSC-NEXT: sra a7, a7, a4 +; RV32I-ZALRSC-NEXT: lr.w a4, (a2) +; RV32I-ZALRSC-NEXT: and a7, a4, a3 +; RV32I-ZALRSC-NEXT: mv a6, a4 +; RV32I-ZALRSC-NEXT: sll a7, a7, a5 +; RV32I-ZALRSC-NEXT: sra a7, a7, a5 ; RV32I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a5, a1 +; RV32I-ZALRSC-NEXT: xor a6, a4, a1 ; RV32I-ZALRSC-NEXT: and a6, a6, a3 -; RV32I-ZALRSC-NEXT: xor a6, a5, a6 +; RV32I-ZALRSC-NEXT: xor a6, a4, a6 ; RV32I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 ; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB22_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -2858,30 +2858,30 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slli a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: slli a1, a1, 48 -; RV64IA-NEXT: li a4, 48 -; RV64IA-NEXT: andi a5, a0, 24 +; RV64IA-NEXT: andi a4, a0, 24 ; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a3, a3, a0 +; RV64IA-NEXT: slli a1, a1, 48 +; RV64IA-NEXT: srai a1, a1, 48 +; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sub a4, a4, a5 +; RV64IA-NEXT: sub a5, a5, a4 ; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a2) -; RV64IA-NEXT: and a7, a5, a3 -; RV64IA-NEXT: mv a6, a5 -; RV64IA-NEXT: sll a7, a7, a4 -; RV64IA-NEXT: sra a7, a7, a4 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a7, a4, a3 +; RV64IA-NEXT: mv a6, a4 +; RV64IA-NEXT: sll a7, a7, a5 +; RV64IA-NEXT: sra a7, a7, a5 ; RV64IA-NEXT: bge a1, a7, .LBB22_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: xor a6, a4, a1 ; RV64IA-NEXT: and a6, a6, a3 -; RV64IA-NEXT: xor a6, a5, a6 +; RV64IA-NEXT: xor a6, a4, a6 ; RV64IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 ; RV64IA-NEXT: sc.w a6, a6, (a2) ; RV64IA-NEXT: bnez a6, .LBB22_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a5, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret @@ -2891,30 +2891,30 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-ZALRSC-NEXT: andi a2, a0, -4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 ; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: slli a1, a1, 48 -; RV64I-ZALRSC-NEXT: li a4, 48 -; RV64I-ZALRSC-NEXT: andi a5, a0, 24 +; RV64I-ZALRSC-NEXT: andi a4, a0, 24 ; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: srai a1, a1, 48 ; RV64I-ZALRSC-NEXT: sllw a3, a3, a0 +; RV64I-ZALRSC-NEXT: slli a1, a1, 48 +; RV64I-ZALRSC-NEXT: srai a1, a1, 48 +; RV64I-ZALRSC-NEXT: li a5, 48 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sub a4, a4, a5 +; RV64I-ZALRSC-NEXT: sub a5, a5, a4 ; RV64I-ZALRSC-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a2) -; RV64I-ZALRSC-NEXT: and a7, a5, a3 -; RV64I-ZALRSC-NEXT: mv a6, a5 -; RV64I-ZALRSC-NEXT: sll a7, a7, a4 -; RV64I-ZALRSC-NEXT: sra a7, a7, a4 +; RV64I-ZALRSC-NEXT: lr.w a4, (a2) +; RV64I-ZALRSC-NEXT: and a7, a4, a3 +; RV64I-ZALRSC-NEXT: mv a6, a4 +; RV64I-ZALRSC-NEXT: sll a7, a7, a5 +; RV64I-ZALRSC-NEXT: sra a7, a7, a5 ; RV64I-ZALRSC-NEXT: bge a1, a7, .LBB22_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a5, a1 +; RV64I-ZALRSC-NEXT: xor a6, a4, a1 ; RV64I-ZALRSC-NEXT: and a6, a6, a3 -; RV64I-ZALRSC-NEXT: xor a6, a5, a6 +; RV64I-ZALRSC-NEXT: xor a6, a4, a6 ; RV64I-ZALRSC-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 ; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a2) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB22_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -2931,34 +2931,34 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB23_2 ; RV32I-NEXT: .LBB23_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB23_4 ; RV32I-NEXT: .LBB23_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bltu s3, a0, .LBB23_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB23_1 ; RV32I-NEXT: .LBB23_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -2970,54 +2970,54 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: lr.w a4, (a3) +; RV32IA-NEXT: and a6, a4, a2 +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: xor a5, a4, a1 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB23_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB23_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -3030,34 +3030,34 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB23_2 ; RV64I-NEXT: .LBB23_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB23_4 ; RV64I-NEXT: .LBB23_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bltu s3, a0, .LBB23_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB23_1 ; RV64I-NEXT: .LBB23_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3069,54 +3069,54 @@ define signext i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: lr.w a4, (a3) +; RV64IA-NEXT: and a6, a4, a2 +; RV64IA-NEXT: mv a5, a4 ; RV64IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: xor a5, a4, a1 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB23_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_umax_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a6, a1, .LBB23_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB23_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -3133,34 +3133,34 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB24_2 ; RV32I-NEXT: .LBB24_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB24_4 ; RV32I-NEXT: .LBB24_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: bgeu s3, a0, .LBB24_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB24_1 ; RV32I-NEXT: .LBB24_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -3172,54 +3172,54 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lui a2, 16 +; RV32IA-NEXT: andi a3, a0, -4 +; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: sll a4, a3, a0 -; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a1, a1, a2 +; RV32IA-NEXT: sll a2, a2, a0 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a2) -; RV32IA-NEXT: and a6, a3, a4 -; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: lr.w a4, (a3) +; RV32IA-NEXT: and a6, a4, a2 +; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV32IA-NEXT: xor a5, a3, a1 -; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: xor a5, a4, a1 +; RV32IA-NEXT: and a5, a5, a2 +; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a2) +; RV32IA-NEXT: sc.w a5, a5, (a3) ; RV32IA-NEXT: bnez a5, .LBB24_1 ; RV32IA-NEXT: # %bb.4: -; RV32IA-NEXT: srl a0, a3, a0 +; RV32IA-NEXT: srl a0, a4, a0 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a2, a0, -4 +; RV32I-ZALRSC-NEXT: lui a2, 16 +; RV32I-ZALRSC-NEXT: andi a3, a0, -4 +; RV32I-ZALRSC-NEXT: addi a2, a2, -1 ; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a3, 16 -; RV32I-ZALRSC-NEXT: addi a3, a3, -1 -; RV32I-ZALRSC-NEXT: sll a4, a3, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a1, a1, a2 +; RV32I-ZALRSC-NEXT: sll a2, a2, a0 ; RV32I-ZALRSC-NEXT: sll a1, a1, a0 ; RV32I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a3, (a2) -; RV32I-ZALRSC-NEXT: and a6, a3, a4 -; RV32I-ZALRSC-NEXT: mv a5, a3 +; RV32I-ZALRSC-NEXT: lr.w a4, (a3) +; RV32I-ZALRSC-NEXT: and a6, a4, a2 +; RV32I-ZALRSC-NEXT: mv a5, a4 ; RV32I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a3, a1 -; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: xor a5, a4, a1 +; RV32I-ZALRSC-NEXT: and a5, a5, a2 +; RV32I-ZALRSC-NEXT: xor a5, a4, a5 ; RV32I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB24_1 ; RV32I-ZALRSC-NEXT: # %bb.4: -; RV32I-ZALRSC-NEXT: srl a0, a3, a0 +; RV32I-ZALRSC-NEXT: srl a0, a4, a0 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -3232,34 +3232,34 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB24_2 ; RV64I-NEXT: .LBB24_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB24_4 ; RV64I-NEXT: .LBB24_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: bgeu s3, a0, .LBB24_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB24_1 ; RV64I-NEXT: .LBB24_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3271,54 +3271,54 @@ define signext i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a2, 16 +; RV64IA-NEXT: andi a3, a0, -4 +; RV64IA-NEXT: addi a2, a2, -1 ; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a2 +; RV64IA-NEXT: sllw a2, a2, a0 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a2) -; RV64IA-NEXT: and a6, a3, a4 -; RV64IA-NEXT: mv a5, a3 +; RV64IA-NEXT: lr.w a4, (a3) +; RV64IA-NEXT: and a6, a4, a2 +; RV64IA-NEXT: mv a5, a4 ; RV64IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV64IA-NEXT: xor a5, a3, a1 -; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: xor a5, a4, a1 +; RV64IA-NEXT: and a5, a5, a2 +; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a2) +; RV64IA-NEXT: sc.w a5, a5, (a3) ; RV64IA-NEXT: bnez a5, .LBB24_1 ; RV64IA-NEXT: # %bb.4: -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: atomicrmw_umin_i16_monotonic: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a2, a0, -4 +; RV64I-ZALRSC-NEXT: lui a2, 16 +; RV64I-ZALRSC-NEXT: andi a3, a0, -4 +; RV64I-ZALRSC-NEXT: addi a2, a2, -1 ; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a3, 16 -; RV64I-ZALRSC-NEXT: addi a3, a3, -1 -; RV64I-ZALRSC-NEXT: sllw a4, a3, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a1, a1, a2 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 ; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 ; RV64I-ZALRSC-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a3, (a2) -; RV64I-ZALRSC-NEXT: and a6, a3, a4 -; RV64I-ZALRSC-NEXT: mv a5, a3 +; RV64I-ZALRSC-NEXT: lr.w a4, (a3) +; RV64I-ZALRSC-NEXT: and a6, a4, a2 +; RV64I-ZALRSC-NEXT: mv a5, a4 ; RV64I-ZALRSC-NEXT: bgeu a1, a6, .LBB24_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a3, a1 -; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: xor a5, a4, a1 +; RV64I-ZALRSC-NEXT: and a5, a5, a2 +; RV64I-ZALRSC-NEXT: xor a5, a4, a5 ; RV64I-ZALRSC-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a2) +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB24_1 ; RV64I-ZALRSC-NEXT: # %bb.4: -; RV64I-ZALRSC-NEXT: srlw a0, a3, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -5599,52 +5599,52 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; ; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a2, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a3) +; RV32IA-NEXT: lr.w a5, (a0) ; RV32IA-NEXT: and a6, a5, a4 ; RV32IA-NEXT: bne a6, a1, .LBB47_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV32IA-NEXT: xor a6, a5, a2 ; RV32IA-NEXT: and a6, a6, a4 ; RV32IA-NEXT: xor a6, a5, a6 -; RV32IA-NEXT: sc.w a6, a6, (a3) +; RV32IA-NEXT: sc.w a6, a6, (a0) ; RV32IA-NEXT: bnez a6, .LBB47_1 ; RV32IA-NEXT: .LBB47_3: -; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: srl a0, a5, a3 ; RV32IA-NEXT: slli a0, a0, 24 ; RV32IA-NEXT: srai a0, a0, 24 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a3, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 ; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a4, a4, a3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 ; RV32I-ZALRSC-NEXT: zext.b a2, a2 -; RV32I-ZALRSC-NEXT: sll a4, a4, a0 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a2, a2, a3 ; RV32I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a5, (a3) +; RV32I-ZALRSC-NEXT: lr.w a5, (a0) ; RV32I-ZALRSC-NEXT: and a6, a5, a4 ; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV32I-ZALRSC-NEXT: xor a6, a5, a2 ; RV32I-ZALRSC-NEXT: and a6, a6, a4 ; RV32I-ZALRSC-NEXT: xor a6, a5, a6 -; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a0) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB47_1 ; RV32I-ZALRSC-NEXT: .LBB47_3: -; RV32I-ZALRSC-NEXT: srl a0, a5, a0 +; RV32I-ZALRSC-NEXT: srl a0, a5, a3 ; RV32I-ZALRSC-NEXT: slli a0, a0, 24 ; RV32I-ZALRSC-NEXT: srai a0, a0, 24 ; RV32I-ZALRSC-NEXT: ret @@ -5665,52 +5665,52 @@ define signext i8 @cmpxchg_i8_monotonic_monotonic_val0(ptr %ptr, i8 signext %cmp ; ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: zext.b a2, a2 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a2, a2, a0 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a3) +; RV64IA-NEXT: lr.w a5, (a0) ; RV64IA-NEXT: and a6, a5, a4 ; RV64IA-NEXT: bne a6, a1, .LBB47_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV64IA-NEXT: xor a6, a5, a2 ; RV64IA-NEXT: and a6, a6, a4 ; RV64IA-NEXT: xor a6, a5, a6 -; RV64IA-NEXT: sc.w a6, a6, (a3) +; RV64IA-NEXT: sc.w a6, a6, (a0) ; RV64IA-NEXT: bnez a6, .LBB47_1 ; RV64IA-NEXT: .LBB47_3: -; RV64IA-NEXT: srlw a0, a5, a0 +; RV64IA-NEXT: srlw a0, a5, a3 ; RV64IA-NEXT: slli a0, a0, 56 ; RV64IA-NEXT: srai a0, a0, 56 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val0: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a3, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 ; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 ; RV64I-ZALRSC-NEXT: zext.b a2, a2 -; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a3 ; RV64I-ZALRSC-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a5, (a3) +; RV64I-ZALRSC-NEXT: lr.w a5, (a0) ; RV64I-ZALRSC-NEXT: and a6, a5, a4 ; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB47_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV64I-ZALRSC-NEXT: xor a6, a5, a2 ; RV64I-ZALRSC-NEXT: and a6, a6, a4 ; RV64I-ZALRSC-NEXT: xor a6, a5, a6 -; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a0) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB47_1 ; RV64I-ZALRSC-NEXT: .LBB47_3: -; RV64I-ZALRSC-NEXT: srlw a0, a5, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a5, a3 ; RV64I-ZALRSC-NEXT: slli a0, a0, 56 ; RV64I-ZALRSC-NEXT: srai a0, a0, 56 ; RV64I-ZALRSC-NEXT: ret @@ -5735,53 +5735,53 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; ; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a4, a4, a3 ; RV32IA-NEXT: zext.b a1, a1 ; RV32IA-NEXT: zext.b a2, a2 -; RV32IA-NEXT: sll a4, a4, a0 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a5, a2, a4 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a5, a3, a4 ; RV32IA-NEXT: bne a5, a1, .LBB48_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: xor a5, a3, a2 ; RV32IA-NEXT: and a5, a5, a4 -; RV32IA-NEXT: xor a5, a2, a5 -; RV32IA-NEXT: sc.w a5, a5, (a3) +; RV32IA-NEXT: xor a5, a3, a5 +; RV32IA-NEXT: sc.w a5, a5, (a0) ; RV32IA-NEXT: bnez a5, .LBB48_1 ; RV32IA-NEXT: .LBB48_3: -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: xor a1, a1, a2 +; RV32IA-NEXT: and a3, a3, a4 +; RV32IA-NEXT: xor a1, a1, a3 ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a3, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 +; RV32I-ZALRSC-NEXT: slli a3, a0, 3 ; RV32I-ZALRSC-NEXT: li a4, 255 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a4, a4, a3 ; RV32I-ZALRSC-NEXT: zext.b a1, a1 ; RV32I-ZALRSC-NEXT: zext.b a2, a2 -; RV32I-ZALRSC-NEXT: sll a4, a4, a0 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: sll a1, a1, a3 +; RV32I-ZALRSC-NEXT: sll a2, a2, a3 ; RV32I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a2, (a3) -; RV32I-ZALRSC-NEXT: and a5, a2, a4 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: and a5, a3, a4 ; RV32I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a5, a2, a0 +; RV32I-ZALRSC-NEXT: xor a5, a3, a2 ; RV32I-ZALRSC-NEXT: and a5, a5, a4 -; RV32I-ZALRSC-NEXT: xor a5, a2, a5 -; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV32I-ZALRSC-NEXT: xor a5, a3, a5 +; RV32I-ZALRSC-NEXT: sc.w a5, a5, (a0) ; RV32I-ZALRSC-NEXT: bnez a5, .LBB48_1 ; RV32I-ZALRSC-NEXT: .LBB48_3: -; RV32I-ZALRSC-NEXT: and a2, a2, a4 -; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: and a3, a3, a4 +; RV32I-ZALRSC-NEXT: xor a1, a1, a3 ; RV32I-ZALRSC-NEXT: seqz a0, a1 ; RV32I-ZALRSC-NEXT: ret ; @@ -5800,53 +5800,53 @@ define i1 @cmpxchg_i8_monotonic_monotonic_val1(ptr %ptr, i8 signext %cmp, i8 sig ; ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: slli a3, a0, 3 ; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: zext.b a1, a1 ; RV64IA-NEXT: zext.b a2, a2 -; RV64IA-NEXT: sllw a4, a4, a0 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a0, a2, a0 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) -; RV64IA-NEXT: and a5, a2, a4 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a5, a3, a4 ; RV64IA-NEXT: bne a5, a1, .LBB48_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: xor a5, a2, a0 +; RV64IA-NEXT: xor a5, a3, a2 ; RV64IA-NEXT: and a5, a5, a4 -; RV64IA-NEXT: xor a5, a2, a5 -; RV64IA-NEXT: sc.w a5, a5, (a3) +; RV64IA-NEXT: xor a5, a3, a5 +; RV64IA-NEXT: sc.w a5, a5, (a0) ; RV64IA-NEXT: bnez a5, .LBB48_1 ; RV64IA-NEXT: .LBB48_3: -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: xor a1, a1, a2 +; RV64IA-NEXT: and a3, a3, a4 +; RV64IA-NEXT: xor a1, a1, a3 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: cmpxchg_i8_monotonic_monotonic_val1: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a3, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 +; RV64I-ZALRSC-NEXT: slli a3, a0, 3 ; RV64I-ZALRSC-NEXT: li a4, 255 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a4, a4, a3 ; RV64I-ZALRSC-NEXT: zext.b a1, a1 ; RV64I-ZALRSC-NEXT: zext.b a2, a2 -; RV64I-ZALRSC-NEXT: sllw a4, a4, a0 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a3 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a3 ; RV64I-ZALRSC-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a2, (a3) -; RV64I-ZALRSC-NEXT: and a5, a2, a4 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: and a5, a3, a4 ; RV64I-ZALRSC-NEXT: bne a5, a1, .LBB48_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a5, a2, a0 +; RV64I-ZALRSC-NEXT: xor a5, a3, a2 ; RV64I-ZALRSC-NEXT: and a5, a5, a4 -; RV64I-ZALRSC-NEXT: xor a5, a2, a5 -; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a3) +; RV64I-ZALRSC-NEXT: xor a5, a3, a5 +; RV64I-ZALRSC-NEXT: sc.w a5, a5, (a0) ; RV64I-ZALRSC-NEXT: bnez a5, .LBB48_1 ; RV64I-ZALRSC-NEXT: .LBB48_3: -; RV64I-ZALRSC-NEXT: and a2, a2, a4 -; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: and a3, a3, a4 +; RV64I-ZALRSC-NEXT: xor a1, a1, a3 ; RV64I-ZALRSC-NEXT: seqz a0, a1 ; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic @@ -5871,54 +5871,54 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; ; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a2, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a3) -; RV32IA-NEXT: and a6, a4, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a6, a3, a5 ; RV32IA-NEXT: bne a6, a1, .LBB49_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 -; RV32IA-NEXT: xor a6, a4, a2 +; RV32IA-NEXT: xor a6, a3, a2 ; RV32IA-NEXT: and a6, a6, a5 -; RV32IA-NEXT: xor a6, a4, a6 -; RV32IA-NEXT: sc.w a6, a6, (a3) +; RV32IA-NEXT: xor a6, a3, a6 +; RV32IA-NEXT: sc.w a6, a6, (a0) ; RV32IA-NEXT: bnez a6, .LBB49_1 ; RV32IA-NEXT: .LBB49_3: -; RV32IA-NEXT: srl a0, a4, a0 +; RV32IA-NEXT: srl a0, a3, a4 ; RV32IA-NEXT: slli a0, a0, 16 ; RV32IA-NEXT: srai a0, a0, 16 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a3, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a4, 16 -; RV32I-ZALRSC-NEXT: addi a4, a4, -1 -; RV32I-ZALRSC-NEXT: sll a5, a4, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a4 -; RV32I-ZALRSC-NEXT: and a2, a2, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sll a2, a2, a0 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a4, a0, 3 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a5, a3, a4 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a2, a2, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a4 ; RV32I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a4, (a3) -; RV32I-ZALRSC-NEXT: and a6, a4, a5 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: and a6, a3, a5 ; RV32I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a6, a4, a2 +; RV32I-ZALRSC-NEXT: xor a6, a3, a2 ; RV32I-ZALRSC-NEXT: and a6, a6, a5 -; RV32I-ZALRSC-NEXT: xor a6, a4, a6 -; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV32I-ZALRSC-NEXT: xor a6, a3, a6 +; RV32I-ZALRSC-NEXT: sc.w a6, a6, (a0) ; RV32I-ZALRSC-NEXT: bnez a6, .LBB49_1 ; RV32I-ZALRSC-NEXT: .LBB49_3: -; RV32I-ZALRSC-NEXT: srl a0, a4, a0 +; RV32I-ZALRSC-NEXT: srl a0, a3, a4 ; RV32I-ZALRSC-NEXT: slli a0, a0, 16 ; RV32I-ZALRSC-NEXT: srai a0, a0, 16 ; RV32I-ZALRSC-NEXT: ret @@ -5939,54 +5939,54 @@ define signext i16 @cmpxchg_i16_monotonic_monotonic_val0(ptr %ptr, i16 signext % ; ; RV64IA-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addi a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a2, a2, a0 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: addi a3, a3, -1 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a5, a3, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a3 +; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a3) -; RV64IA-NEXT: and a6, a4, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a6, a3, a5 ; RV64IA-NEXT: bne a6, a1, .LBB49_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 -; RV64IA-NEXT: xor a6, a4, a2 +; RV64IA-NEXT: xor a6, a3, a2 ; RV64IA-NEXT: and a6, a6, a5 -; RV64IA-NEXT: xor a6, a4, a6 -; RV64IA-NEXT: sc.w a6, a6, (a3) +; RV64IA-NEXT: xor a6, a3, a6 +; RV64IA-NEXT: sc.w a6, a6, (a0) ; RV64IA-NEXT: bnez a6, .LBB49_1 ; RV64IA-NEXT: .LBB49_3: -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a3, a4 ; RV64IA-NEXT: slli a0, a0, 48 ; RV64IA-NEXT: srai a0, a0, 48 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val0: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a3, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a4, 16 -; RV64I-ZALRSC-NEXT: addi a4, a4, -1 -; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a4 -; RV64I-ZALRSC-NEXT: and a2, a2, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sllw a2, a2, a0 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a4, a0, 3 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a5, a3, a4 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a2, a2, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a4 ; RV64I-ZALRSC-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a4, (a3) -; RV64I-ZALRSC-NEXT: and a6, a4, a5 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: and a6, a3, a5 ; RV64I-ZALRSC-NEXT: bne a6, a1, .LBB49_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a6, a4, a2 +; RV64I-ZALRSC-NEXT: xor a6, a3, a2 ; RV64I-ZALRSC-NEXT: and a6, a6, a5 -; RV64I-ZALRSC-NEXT: xor a6, a4, a6 -; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a3) +; RV64I-ZALRSC-NEXT: xor a6, a3, a6 +; RV64I-ZALRSC-NEXT: sc.w a6, a6, (a0) ; RV64I-ZALRSC-NEXT: bnez a6, .LBB49_1 ; RV64I-ZALRSC-NEXT: .LBB49_3: -; RV64I-ZALRSC-NEXT: srlw a0, a4, a0 +; RV64I-ZALRSC-NEXT: srlw a0, a3, a4 ; RV64I-ZALRSC-NEXT: slli a0, a0, 48 ; RV64I-ZALRSC-NEXT: srai a0, a0, 48 ; RV64I-ZALRSC-NEXT: ret @@ -6011,55 +6011,55 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; ; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a3, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: lui a4, 16 -; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a5, a4, a0 -; RV32IA-NEXT: and a1, a1, a4 -; RV32IA-NEXT: and a2, a2, a4 -; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a0, -4 +; RV32IA-NEXT: sll a5, a3, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a4 +; RV32IA-NEXT: sll a2, a2, a4 ; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a2, (a3) -; RV32IA-NEXT: and a4, a2, a5 +; RV32IA-NEXT: lr.w a3, (a0) +; RV32IA-NEXT: and a4, a3, a5 ; RV32IA-NEXT: bne a4, a1, .LBB50_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 -; RV32IA-NEXT: xor a4, a2, a0 +; RV32IA-NEXT: xor a4, a3, a2 ; RV32IA-NEXT: and a4, a4, a5 -; RV32IA-NEXT: xor a4, a2, a4 -; RV32IA-NEXT: sc.w a4, a4, (a3) +; RV32IA-NEXT: xor a4, a3, a4 +; RV32IA-NEXT: sc.w a4, a4, (a0) ; RV32IA-NEXT: bnez a4, .LBB50_1 ; RV32IA-NEXT: .LBB50_3: -; RV32IA-NEXT: and a2, a2, a5 -; RV32IA-NEXT: xor a1, a1, a2 +; RV32IA-NEXT: and a3, a3, a5 +; RV32IA-NEXT: xor a1, a1, a3 ; RV32IA-NEXT: seqz a0, a1 ; RV32IA-NEXT: ret ; ; RV32I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV32I-ZALRSC: # %bb.0: -; RV32I-ZALRSC-NEXT: andi a3, a0, -4 -; RV32I-ZALRSC-NEXT: slli a0, a0, 3 -; RV32I-ZALRSC-NEXT: lui a4, 16 -; RV32I-ZALRSC-NEXT: addi a4, a4, -1 -; RV32I-ZALRSC-NEXT: sll a5, a4, a0 -; RV32I-ZALRSC-NEXT: and a1, a1, a4 -; RV32I-ZALRSC-NEXT: and a2, a2, a4 -; RV32I-ZALRSC-NEXT: sll a1, a1, a0 -; RV32I-ZALRSC-NEXT: sll a0, a2, a0 +; RV32I-ZALRSC-NEXT: lui a3, 16 +; RV32I-ZALRSC-NEXT: slli a4, a0, 3 +; RV32I-ZALRSC-NEXT: addi a3, a3, -1 +; RV32I-ZALRSC-NEXT: andi a0, a0, -4 +; RV32I-ZALRSC-NEXT: sll a5, a3, a4 +; RV32I-ZALRSC-NEXT: and a1, a1, a3 +; RV32I-ZALRSC-NEXT: and a2, a2, a3 +; RV32I-ZALRSC-NEXT: sll a1, a1, a4 +; RV32I-ZALRSC-NEXT: sll a2, a2, a4 ; RV32I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV32I-ZALRSC-NEXT: lr.w a2, (a3) -; RV32I-ZALRSC-NEXT: and a4, a2, a5 +; RV32I-ZALRSC-NEXT: lr.w a3, (a0) +; RV32I-ZALRSC-NEXT: and a4, a3, a5 ; RV32I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 ; RV32I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 -; RV32I-ZALRSC-NEXT: xor a4, a2, a0 +; RV32I-ZALRSC-NEXT: xor a4, a3, a2 ; RV32I-ZALRSC-NEXT: and a4, a4, a5 -; RV32I-ZALRSC-NEXT: xor a4, a2, a4 -; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV32I-ZALRSC-NEXT: xor a4, a3, a4 +; RV32I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV32I-ZALRSC-NEXT: bnez a4, .LBB50_1 ; RV32I-ZALRSC-NEXT: .LBB50_3: -; RV32I-ZALRSC-NEXT: and a2, a2, a5 -; RV32I-ZALRSC-NEXT: xor a1, a1, a2 +; RV32I-ZALRSC-NEXT: and a3, a3, a5 +; RV32I-ZALRSC-NEXT: xor a1, a1, a3 ; RV32I-ZALRSC-NEXT: seqz a0, a1 ; RV32I-ZALRSC-NEXT: ret ; @@ -6078,55 +6078,55 @@ define i1 @cmpxchg_i16_monotonic_monotonic_val1(ptr %ptr, i16 signext %cmp, i16 ; ; RV64IA-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: lui a4, 16 -; RV64IA-NEXT: addi a4, a4, -1 -; RV64IA-NEXT: sllw a5, a4, a0 -; RV64IA-NEXT: and a1, a1, a4 -; RV64IA-NEXT: and a2, a2, a4 -; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: sllw a0, a2, a0 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: addi a3, a3, -1 +; RV64IA-NEXT: andi a0, a0, -4 +; RV64IA-NEXT: sllw a5, a3, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a2, a2, a3 +; RV64IA-NEXT: sllw a1, a1, a4 +; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a2, (a3) -; RV64IA-NEXT: and a4, a2, a5 +; RV64IA-NEXT: lr.w a3, (a0) +; RV64IA-NEXT: and a4, a3, a5 ; RV64IA-NEXT: bne a4, a1, .LBB50_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 -; RV64IA-NEXT: xor a4, a2, a0 +; RV64IA-NEXT: xor a4, a3, a2 ; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: xor a4, a2, a4 -; RV64IA-NEXT: sc.w a4, a4, (a3) +; RV64IA-NEXT: xor a4, a3, a4 +; RV64IA-NEXT: sc.w a4, a4, (a0) ; RV64IA-NEXT: bnez a4, .LBB50_1 ; RV64IA-NEXT: .LBB50_3: -; RV64IA-NEXT: and a2, a2, a5 -; RV64IA-NEXT: xor a1, a1, a2 +; RV64IA-NEXT: and a3, a3, a5 +; RV64IA-NEXT: xor a1, a1, a3 ; RV64IA-NEXT: seqz a0, a1 ; RV64IA-NEXT: ret ; ; RV64I-ZALRSC-LABEL: cmpxchg_i16_monotonic_monotonic_val1: ; RV64I-ZALRSC: # %bb.0: -; RV64I-ZALRSC-NEXT: andi a3, a0, -4 -; RV64I-ZALRSC-NEXT: slli a0, a0, 3 -; RV64I-ZALRSC-NEXT: lui a4, 16 -; RV64I-ZALRSC-NEXT: addi a4, a4, -1 -; RV64I-ZALRSC-NEXT: sllw a5, a4, a0 -; RV64I-ZALRSC-NEXT: and a1, a1, a4 -; RV64I-ZALRSC-NEXT: and a2, a2, a4 -; RV64I-ZALRSC-NEXT: sllw a1, a1, a0 -; RV64I-ZALRSC-NEXT: sllw a0, a2, a0 +; RV64I-ZALRSC-NEXT: lui a3, 16 +; RV64I-ZALRSC-NEXT: slli a4, a0, 3 +; RV64I-ZALRSC-NEXT: addi a3, a3, -1 +; RV64I-ZALRSC-NEXT: andi a0, a0, -4 +; RV64I-ZALRSC-NEXT: sllw a5, a3, a4 +; RV64I-ZALRSC-NEXT: and a1, a1, a3 +; RV64I-ZALRSC-NEXT: and a2, a2, a3 +; RV64I-ZALRSC-NEXT: sllw a1, a1, a4 +; RV64I-ZALRSC-NEXT: sllw a2, a2, a4 ; RV64I-ZALRSC-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV64I-ZALRSC-NEXT: lr.w a2, (a3) -; RV64I-ZALRSC-NEXT: and a4, a2, a5 +; RV64I-ZALRSC-NEXT: lr.w a3, (a0) +; RV64I-ZALRSC-NEXT: and a4, a3, a5 ; RV64I-ZALRSC-NEXT: bne a4, a1, .LBB50_3 ; RV64I-ZALRSC-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 -; RV64I-ZALRSC-NEXT: xor a4, a2, a0 +; RV64I-ZALRSC-NEXT: xor a4, a3, a2 ; RV64I-ZALRSC-NEXT: and a4, a4, a5 -; RV64I-ZALRSC-NEXT: xor a4, a2, a4 -; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a3) +; RV64I-ZALRSC-NEXT: xor a4, a3, a4 +; RV64I-ZALRSC-NEXT: sc.w a4, a4, (a0) ; RV64I-ZALRSC-NEXT: bnez a4, .LBB50_1 ; RV64I-ZALRSC-NEXT: .LBB50_3: -; RV64I-ZALRSC-NEXT: and a2, a2, a5 -; RV64I-ZALRSC-NEXT: xor a1, a1, a2 +; RV64I-ZALRSC-NEXT: and a3, a3, a5 +; RV64I-ZALRSC-NEXT: xor a1, a1, a3 ; RV64I-ZALRSC-NEXT: seqz a0, a1 ; RV64I-ZALRSC-NEXT: ret %1 = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index ea9786d0b10b3..59905534e2012 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -62,10 +62,10 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_usub_cond_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 ; RV32IA-NEXT: andi a0, a3, 24 -; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 ; RV32IA-NEXT: zext.b a4, a1 @@ -145,10 +145,10 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_usub_cond_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: li a5, 255 ; RV64IA-NEXT: andi a0, a4, 24 -; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: sllw a4, a5, a4 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: zext.b a5, a1 @@ -156,25 +156,25 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 ; RV64IA-NEXT: srlw a6, a3, a0 -; RV64IA-NEXT: sext.w a7, a3 -; RV64IA-NEXT: zext.b t0, a6 -; RV64IA-NEXT: sltu t0, t0, a5 -; RV64IA-NEXT: addi t0, t0, -1 -; RV64IA-NEXT: and t0, t0, a1 -; RV64IA-NEXT: sub a6, a6, t0 +; RV64IA-NEXT: zext.b a7, a6 +; RV64IA-NEXT: sltu a7, a7, a5 +; RV64IA-NEXT: addi a7, a7, -1 +; RV64IA-NEXT: and a7, a7, a1 +; RV64IA-NEXT: sub a6, a6, a7 ; RV64IA-NEXT: zext.b a6, a6 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a6, a3, a6 +; RV64IA-NEXT: and a7, a3, a4 +; RV64IA-NEXT: sext.w t0, a3 +; RV64IA-NEXT: or a6, a7, a6 ; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 ; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a7, .LBB0_1 +; RV64IA-NEXT: bne a3, t0, .LBB0_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 -; RV64IA-NEXT: sc.w.rl t0, a6, (a2) -; RV64IA-NEXT: bnez t0, .LBB0_3 +; RV64IA-NEXT: sc.w.rl a7, a6, (a2) +; RV64IA-NEXT: bnez a7, .LBB0_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-NEXT: srlw a0, a3, a0 @@ -198,29 +198,29 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: .cfi_offset s3, -20 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: .LBB1_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 +; RV32I-NEXT: and a0, a3, s2 ; RV32I-NEXT: sltu a0, a0, s3 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s0 -; RV32I-NEXT: sub a2, a1, a0 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: sub a2, a3, a0 +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: beqz a0, .LBB1_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -238,11 +238,11 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV32IA-LABEL: atomicrmw_usub_cond_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a4, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a0, a4, 24 -; RV32IA-NEXT: addi a3, a3, -1 ; RV32IA-NEXT: lw a6, 0(a2) +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 ; RV32IA-NEXT: and a5, a1, a3 @@ -288,29 +288,29 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 ; RV64I-NEXT: .cfi_offset s3, -40 -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: .LBB1_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 +; RV64I-NEXT: and a0, a3, s2 ; RV64I-NEXT: sltu a0, a0, s3 ; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a0, a0, s0 -; RV64I-NEXT: sub a2, a1, a0 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: sub a2, a3, a0 +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: beqz a0, .LBB1_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -328,40 +328,40 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_usub_cond_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a5, a0, 3 -; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: andi a0, a5, 24 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 +; RV64IA-NEXT: sllw a5, a4, a5 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: and a6, a1, a3 +; RV64IA-NEXT: and a6, a1, a4 ; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 -; RV64IA-NEXT: srlw a7, a4, a0 -; RV64IA-NEXT: sext.w t0, a4 -; RV64IA-NEXT: and t1, a7, a3 -; RV64IA-NEXT: sltu t1, t1, a6 -; RV64IA-NEXT: addi t1, t1, -1 -; RV64IA-NEXT: and t1, t1, a1 -; RV64IA-NEXT: sub a7, a7, t1 -; RV64IA-NEXT: and a7, a7, a3 +; RV64IA-NEXT: srlw a7, a3, a0 +; RV64IA-NEXT: and t0, a7, a4 +; RV64IA-NEXT: sltu t0, t0, a6 +; RV64IA-NEXT: addi t0, t0, -1 +; RV64IA-NEXT: and t0, t0, a1 +; RV64IA-NEXT: sub a7, a7, t0 +; RV64IA-NEXT: and a7, a7, a4 ; RV64IA-NEXT: sllw a7, a7, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a7, a4, a7 +; RV64IA-NEXT: and t0, a3, a5 +; RV64IA-NEXT: sext.w t1, a3 +; RV64IA-NEXT: or a7, t0, a7 ; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, t0, .LBB1_1 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: bne a3, t1, .LBB1_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 -; RV64IA-NEXT: sc.w.rl t1, a7, (a2) -; RV64IA-NEXT: bnez t1, .LBB1_3 +; RV64IA-NEXT: sc.w.rl t0, a7, (a2) +; RV64IA-NEXT: bnez t0, .LBB1_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a3, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst ret i16 %result @@ -741,10 +741,10 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_usub_sat_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: andi a0, a0, 24 ; RV32IA-NEXT: not a3, a3 ; RV32IA-NEXT: zext.b a1, a1 @@ -818,10 +818,10 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_usub_sat_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: lw a3, 0(a2) +; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: andi a0, a0, 24 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: zext.b a1, a1 @@ -829,24 +829,24 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB4_3 Depth 2 ; RV64IA-NEXT: srlw a5, a3, a0 -; RV64IA-NEXT: sext.w a6, a3 ; RV64IA-NEXT: zext.b a5, a5 -; RV64IA-NEXT: sub a7, a5, a1 -; RV64IA-NEXT: sltu a5, a5, a7 +; RV64IA-NEXT: sub a6, a5, a1 +; RV64IA-NEXT: sltu a5, a5, a6 ; RV64IA-NEXT: addi a5, a5, -1 -; RV64IA-NEXT: and a5, a5, a7 +; RV64IA-NEXT: and a5, a5, a6 ; RV64IA-NEXT: sllw a5, a5, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a5, a3, a5 +; RV64IA-NEXT: and a6, a3, a4 +; RV64IA-NEXT: sext.w a7, a3 +; RV64IA-NEXT: or a5, a6, a5 ; RV64IA-NEXT: .LBB4_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 ; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB4_1 +; RV64IA-NEXT: bne a3, a7, .LBB4_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2 -; RV64IA-NEXT: sc.w.rl a7, a5, (a2) -; RV64IA-NEXT: bnez a7, .LBB4_3 +; RV64IA-NEXT: sc.w.rl a6, a5, (a2) +; RV64IA-NEXT: bnez a6, .LBB4_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-NEXT: srlw a0, a3, a0 @@ -905,11 +905,11 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV32IA-LABEL: atomicrmw_usub_sat_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a4, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a0, a4, 24 -; RV32IA-NEXT: addi a3, a3, -1 ; RV32IA-NEXT: lw a5, 0(a2) +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 ; RV32IA-NEXT: and a1, a1, a3 @@ -989,39 +989,39 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_usub_sat_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a5, a0, 3 -; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: andi a0, a5, 24 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 +; RV64IA-NEXT: sllw a5, a4, a5 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB5_3 Depth 2 -; RV64IA-NEXT: srlw a6, a4, a0 -; RV64IA-NEXT: sext.w a7, a4 -; RV64IA-NEXT: and a6, a6, a3 -; RV64IA-NEXT: sub t0, a6, a1 -; RV64IA-NEXT: sltu a6, a6, t0 +; RV64IA-NEXT: srlw a6, a3, a0 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: sub a7, a6, a1 +; RV64IA-NEXT: sltu a6, a6, a7 ; RV64IA-NEXT: addi a6, a6, -1 -; RV64IA-NEXT: and a6, a6, t0 +; RV64IA-NEXT: and a6, a6, a7 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a6, a4, a6 +; RV64IA-NEXT: and a7, a3, a5 +; RV64IA-NEXT: sext.w t0, a3 +; RV64IA-NEXT: or a6, a7, a6 ; RV64IA-NEXT: .LBB5_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB5_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB5_1 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: bne a3, t0, .LBB5_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_3 Depth=2 -; RV64IA-NEXT: sc.w.rl t0, a6, (a2) -; RV64IA-NEXT: bnez t0, .LBB5_3 +; RV64IA-NEXT: sc.w.rl a7, a6, (a2) +; RV64IA-NEXT: bnez a7, .LBB5_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a3, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst ret i16 %result diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 4e04f38a6301d..7ed259532f883 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -29,11 +29,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: zext.b s1, a1 ; RV32I-NEXT: .LBB0_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: addi a0, a3, 1 -; RV32I-NEXT: zext.b a1, a3 -; RV32I-NEXT: sltu a1, a1, s1 -; RV32I-NEXT: neg a2, a1 -; RV32I-NEXT: and a2, a2, a0 +; RV32I-NEXT: zext.b a0, a3 +; RV32I-NEXT: sltu a0, a0, s1 +; RV32I-NEXT: addi a1, a3, 1 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: and a2, a2, a1 ; RV32I-NEXT: sb a3, 3(sp) ; RV32I-NEXT: addi a1, sp, 3 ; RV32I-NEXT: li a3, 5 @@ -57,10 +57,10 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_uinc_wrap_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: andi a0, a0, 24 ; RV32IA-NEXT: not a3, a3 ; RV32IA-NEXT: zext.b a1, a1 @@ -70,8 +70,8 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32IA-NEXT: mv a5, a4 ; RV32IA-NEXT: srl a4, a4, a0 ; RV32IA-NEXT: zext.b a6, a4 -; RV32IA-NEXT: addi a4, a4, 1 ; RV32IA-NEXT: sltu a6, a6, a1 +; RV32IA-NEXT: addi a4, a4, 1 ; RV32IA-NEXT: neg a6, a6 ; RV32IA-NEXT: and a4, a6, a4 ; RV32IA-NEXT: zext.b a4, a4 @@ -107,11 +107,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: zext.b s1, a1 ; RV64I-NEXT: .LBB0_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addi a0, a3, 1 -; RV64I-NEXT: zext.b a1, a3 -; RV64I-NEXT: sltu a1, a1, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 +; RV64I-NEXT: zext.b a0, a3 +; RV64I-NEXT: sltu a0, a0, s1 +; RV64I-NEXT: addi a1, a3, 1 +; RV64I-NEXT: neg a2, a0 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: sb a3, 7(sp) ; RV64I-NEXT: addi a1, sp, 7 ; RV64I-NEXT: li a3, 5 @@ -135,10 +135,10 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_uinc_wrap_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a4, a3, a0 ; RV64IA-NEXT: lw a3, 0(a2) +; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: andi a0, a0, 24 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: zext.b a1, a1 @@ -146,25 +146,25 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 ; RV64IA-NEXT: srlw a5, a3, a0 -; RV64IA-NEXT: sext.w a6, a3 -; RV64IA-NEXT: zext.b a7, a5 +; RV64IA-NEXT: zext.b a6, a5 +; RV64IA-NEXT: sltu a6, a6, a1 ; RV64IA-NEXT: addi a5, a5, 1 -; RV64IA-NEXT: sltu a7, a7, a1 -; RV64IA-NEXT: neg a7, a7 -; RV64IA-NEXT: and a5, a7, a5 +; RV64IA-NEXT: neg a6, a6 +; RV64IA-NEXT: and a5, a6, a5 ; RV64IA-NEXT: zext.b a5, a5 ; RV64IA-NEXT: sllw a5, a5, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a5, a3, a5 +; RV64IA-NEXT: and a6, a3, a4 +; RV64IA-NEXT: sext.w a7, a3 +; RV64IA-NEXT: or a5, a6, a5 ; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 ; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB0_1 +; RV64IA-NEXT: bne a3, a7, .LBB0_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 -; RV64IA-NEXT: sc.w.rl a7, a5, (a2) -; RV64IA-NEXT: bnez a7, .LBB0_3 +; RV64IA-NEXT: sc.w.rl a6, a5, (a2) +; RV64IA-NEXT: bnez a6, .LBB0_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end ; RV64IA-NEXT: srlw a0, a3, a0 @@ -194,8 +194,8 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .LBB1_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: and a0, a3, s1 -; RV32I-NEXT: addi a1, a3, 1 ; RV32I-NEXT: sltu a0, a0, s2 +; RV32I-NEXT: addi a1, a3, 1 ; RV32I-NEXT: neg a2, a0 ; RV32I-NEXT: and a2, a2, a1 ; RV32I-NEXT: sh a3, 14(sp) @@ -223,11 +223,11 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-LABEL: atomicrmw_uinc_wrap_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a4, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a0, a4, 24 -; RV32IA-NEXT: addi a3, a3, -1 ; RV32IA-NEXT: lw a5, 0(a2) +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 ; RV32IA-NEXT: and a1, a1, a3 @@ -239,8 +239,8 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: and a7, a5, a3 ; RV32IA-NEXT: addi a5, a5, 1 ; RV32IA-NEXT: sltu a7, a7, a1 -; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: neg a7, a7 +; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: and a5, a7, a5 ; RV32IA-NEXT: sll a5, a5, a0 ; RV32IA-NEXT: and a7, a6, a4 @@ -279,8 +279,8 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .LBB1_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: and a0, a3, s1 -; RV64I-NEXT: addi a1, a3, 1 ; RV64I-NEXT: sltu a0, a0, s2 +; RV64I-NEXT: addi a1, a3, 1 ; RV64I-NEXT: neg a2, a0 ; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: sh a3, 14(sp) @@ -308,40 +308,40 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_uinc_wrap_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a5, a0, 3 -; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: andi a0, a5, 24 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 +; RV64IA-NEXT: sllw a5, a4, a5 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: and a1, a1, a4 ; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 -; RV64IA-NEXT: srlw a6, a4, a0 -; RV64IA-NEXT: sext.w a7, a4 -; RV64IA-NEXT: and t0, a6, a3 +; RV64IA-NEXT: srlw a6, a3, a0 +; RV64IA-NEXT: and a7, a6, a4 ; RV64IA-NEXT: addi a6, a6, 1 -; RV64IA-NEXT: sltu t0, t0, a1 -; RV64IA-NEXT: and a6, a6, a3 -; RV64IA-NEXT: neg t0, t0 -; RV64IA-NEXT: and a6, t0, a6 +; RV64IA-NEXT: sltu a7, a7, a1 +; RV64IA-NEXT: neg a7, a7 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: and a6, a7, a6 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a6, a4, a6 +; RV64IA-NEXT: and a7, a3, a5 +; RV64IA-NEXT: sext.w t0, a3 +; RV64IA-NEXT: or a6, a7, a6 ; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB1_1 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: bne a3, t0, .LBB1_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 -; RV64IA-NEXT: sc.w.rl t0, a6, (a2) -; RV64IA-NEXT: bnez t0, .LBB1_3 +; RV64IA-NEXT: sc.w.rl a7, a6, (a2) +; RV64IA-NEXT: bnez a7, .LBB1_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a3, a0 ; RV64IA-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -363,10 +363,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: .LBB2_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: addi a0, a3, 1 -; RV32I-NEXT: sltu a1, a3, s1 -; RV32I-NEXT: neg a2, a1 -; RV32I-NEXT: and a2, a2, a0 +; RV32I-NEXT: sltu a0, a3, s1 +; RV32I-NEXT: addi a1, a3, 1 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: and a2, a2, a1 ; RV32I-NEXT: sw a3, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 @@ -394,10 +394,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: addi a2, a2, 1 -; RV32IA-NEXT: sltu a4, a3, a1 -; RV32IA-NEXT: neg a4, a4 -; RV32IA-NEXT: and a4, a4, a2 +; RV32IA-NEXT: sltu a2, a2, a1 +; RV32IA-NEXT: addi a4, a3, 1 +; RV32IA-NEXT: neg a2, a2 +; RV32IA-NEXT: and a4, a2, a4 ; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -427,10 +427,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV64I-NEXT: sext.w s1, a1 ; RV64I-NEXT: .LBB2_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addiw a0, a3, 1 -; RV64I-NEXT: sltu a1, a3, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 +; RV64I-NEXT: sltu a0, a3, s1 +; RV64I-NEXT: addiw a1, a3, 1 +; RV64I-NEXT: neg a2, a0 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: sw a3, 4(sp) ; RV64I-NEXT: addi a1, sp, 4 ; RV64I-NEXT: li a3, 5 @@ -458,19 +458,19 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB2_3 Depth 2 -; RV64IA-NEXT: addiw a3, a2, 1 -; RV64IA-NEXT: sext.w a4, a2 -; RV64IA-NEXT: sltu a2, a4, a1 -; RV64IA-NEXT: neg a2, a2 -; RV64IA-NEXT: and a3, a2, a3 +; RV64IA-NEXT: sext.w a3, a2 +; RV64IA-NEXT: sltu a4, a3, a1 +; RV64IA-NEXT: addiw a2, a2, 1 +; RV64IA-NEXT: neg a4, a4 +; RV64IA-NEXT: and a4, a4, a2 ; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 ; RV64IA-NEXT: lr.w.aqrl a2, (a0) -; RV64IA-NEXT: bne a2, a4, .LBB2_1 +; RV64IA-NEXT: bne a2, a3, .LBB2_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2 -; RV64IA-NEXT: sc.w.rl a5, a3, (a0) +; RV64IA-NEXT: sc.w.rl a5, a4, (a0) ; RV64IA-NEXT: bnez a5, .LBB2_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end @@ -505,10 +505,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32I-NEXT: addi a1, a4, 1 +; RV32I-NEXT: seqz a2, a1 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: seqz a3, a1 +; RV32I-NEXT: add a3, a5, a2 ; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: add a3, a5, a3 ; RV32I-NEXT: and a3, a0, a3 ; RV32I-NEXT: sw a4, 8(sp) ; RV32I-NEXT: sw a5, 12(sp) @@ -565,10 +565,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32IA-NEXT: addi a1, a4, 1 +; RV32IA-NEXT: seqz a2, a1 ; RV32IA-NEXT: neg a0, a0 -; RV32IA-NEXT: seqz a3, a1 +; RV32IA-NEXT: add a3, a5, a2 ; RV32IA-NEXT: and a2, a0, a1 -; RV32IA-NEXT: add a3, a5, a3 ; RV32IA-NEXT: and a3, a0, a3 ; RV32IA-NEXT: sw a4, 8(sp) ; RV32IA-NEXT: sw a5, 12(sp) @@ -616,10 +616,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: .LBB3_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addi a0, a3, 1 -; RV64I-NEXT: sltu a1, a3, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 +; RV64I-NEXT: sltu a0, a3, s1 +; RV64I-NEXT: addi a1, a3, 1 +; RV64I-NEXT: neg a2, a0 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: sd a3, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 @@ -647,10 +647,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: addi a2, a2, 1 -; RV64IA-NEXT: sltu a4, a3, a1 -; RV64IA-NEXT: neg a4, a4 -; RV64IA-NEXT: and a4, a4, a2 +; RV64IA-NEXT: sltu a2, a2, a1 +; RV64IA-NEXT: addi a4, a3, 1 +; RV64IA-NEXT: neg a2, a2 +; RV64IA-NEXT: and a4, a2, a4 ; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -725,10 +725,10 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_udec_wrap_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 ; RV32IA-NEXT: andi a0, a3, 24 -; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 ; RV32IA-NEXT: zext.b a4, a1 @@ -826,33 +826,33 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_udec_wrap_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: li a5, 255 ; RV64IA-NEXT: andi a0, a4, 24 -; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: sllw a4, a5, a4 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: zext.b a5, a1 ; RV64IA-NEXT: j .LBB4_2 ; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64IA-NEXT: sext.w a6, a3 -; RV64IA-NEXT: zext.b a7, a7 -; RV64IA-NEXT: sllw a7, a7, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a7, a3, a7 +; RV64IA-NEXT: zext.b a6, a7 +; RV64IA-NEXT: sllw a6, a6, a0 +; RV64IA-NEXT: and a7, a3, a4 +; RV64IA-NEXT: sext.w t0, a3 +; RV64IA-NEXT: or a6, a7, a6 ; RV64IA-NEXT: .LBB4_5: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB4_2 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 ; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB4_7 +; RV64IA-NEXT: bne a3, t0, .LBB4_7 ; RV64IA-NEXT: # %bb.6: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_5 Depth=2 -; RV64IA-NEXT: sc.w.rl t0, a7, (a2) -; RV64IA-NEXT: bnez t0, .LBB4_5 +; RV64IA-NEXT: sc.w.rl a7, a6, (a2) +; RV64IA-NEXT: bnez a7, .LBB4_5 ; RV64IA-NEXT: .LBB4_7: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64IA-NEXT: beq a3, a6, .LBB4_4 +; RV64IA-NEXT: beq a3, t0, .LBB4_4 ; RV64IA-NEXT: .LBB4_2: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB4_5 Depth 2 @@ -889,37 +889,37 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: .cfi_offset s3, -20 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB5_2 ; RV32I-NEXT: .LBB5_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a3, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: lh a3, 10(sp) ; RV32I-NEXT: bnez a0, .LBB5_4 ; RV32I-NEXT: .LBB5_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: and a0, a3, s2 +; RV32I-NEXT: seqz a1, a0 ; RV32I-NEXT: sltu a0, s3, a0 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: bnez a0, .LBB5_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: addi a2, a3, -1 ; RV32I-NEXT: j .LBB5_1 ; RV32I-NEXT: .LBB5_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -937,11 +937,11 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-LABEL: atomicrmw_udec_wrap_i16: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a4, a0, 3 ; RV32IA-NEXT: lui a3, 16 -; RV32IA-NEXT: andi a0, a4, 24 -; RV32IA-NEXT: addi a3, a3, -1 ; RV32IA-NEXT: lw a7, 0(a2) +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 ; RV32IA-NEXT: and a5, a1, a3 @@ -997,37 +997,37 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 ; RV64I-NEXT: .cfi_offset s3, -40 -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addi s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB5_2 ; RV64I-NEXT: .LBB5_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a3, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: lh a3, 6(sp) ; RV64I-NEXT: bnez a0, .LBB5_4 ; RV64I-NEXT: .LBB5_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: seqz a2, a0 +; RV64I-NEXT: and a0, a3, s2 +; RV64I-NEXT: seqz a1, a0 ; RV64I-NEXT: sltu a0, s3, a0 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: bnez a0, .LBB5_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64I-NEXT: addi a2, a1, -1 +; RV64I-NEXT: addi a2, a3, -1 ; RV64I-NEXT: j .LBB5_1 ; RV64I-NEXT: .LBB5_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1045,39 +1045,39 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_udec_wrap_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: lui a4, 16 +; RV64IA-NEXT: lw a3, 0(a2) ; RV64IA-NEXT: slli a5, a0, 3 -; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addi a4, a4, -1 ; RV64IA-NEXT: andi a0, a5, 24 -; RV64IA-NEXT: addi a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 +; RV64IA-NEXT: sllw a5, a4, a5 ; RV64IA-NEXT: not a5, a5 -; RV64IA-NEXT: and a6, a1, a3 +; RV64IA-NEXT: and a6, a1, a4 ; RV64IA-NEXT: j .LBB5_2 ; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64IA-NEXT: sext.w a7, a4 -; RV64IA-NEXT: and t0, t0, a3 -; RV64IA-NEXT: sllw t0, t0, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or t0, a4, t0 +; RV64IA-NEXT: and a7, t0, a4 +; RV64IA-NEXT: sllw a7, a7, a0 +; RV64IA-NEXT: and t0, a3, a5 +; RV64IA-NEXT: sext.w t1, a3 +; RV64IA-NEXT: or a7, t0, a7 ; RV64IA-NEXT: .LBB5_5: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB5_2 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB5_7 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: bne a3, t1, .LBB5_7 ; RV64IA-NEXT: # %bb.6: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_5 Depth=2 -; RV64IA-NEXT: sc.w.rl t1, t0, (a2) -; RV64IA-NEXT: bnez t1, .LBB5_5 +; RV64IA-NEXT: sc.w.rl t0, a7, (a2) +; RV64IA-NEXT: bnez t0, .LBB5_5 ; RV64IA-NEXT: .LBB5_7: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64IA-NEXT: beq a4, a7, .LBB5_4 +; RV64IA-NEXT: beq a3, t1, .LBB5_4 ; RV64IA-NEXT: .LBB5_2: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB5_5 Depth 2 -; RV64IA-NEXT: srlw a7, a4, a0 -; RV64IA-NEXT: and t0, a7, a3 +; RV64IA-NEXT: srlw a7, a3, a0 +; RV64IA-NEXT: and t0, a7, a4 ; RV64IA-NEXT: seqz t1, t0 ; RV64IA-NEXT: sltu t0, a6, t0 ; RV64IA-NEXT: or t1, t1, t0 @@ -1088,7 +1088,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: addi t0, a7, -1 ; RV64IA-NEXT: j .LBB5_1 ; RV64IA-NEXT: .LBB5_4: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a3, a0 ; RV64IA-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst ret i16 %result diff --git a/llvm/test/CodeGen/RISCV/avgceils.ll b/llvm/test/CodeGen/RISCV/avgceils.ll index 64410fad6029a..0327738d32604 100644 --- a/llvm/test/CodeGen/RISCV/avgceils.ll +++ b/llvm/test/CodeGen/RISCV/avgceils.ll @@ -131,10 +131,10 @@ define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_fixed_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: or a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srai a0, a0, 1 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srai a2, a2, 1 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i32: @@ -155,10 +155,10 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_ext_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: or a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srai a0, a0, 1 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srai a2, a2, 1 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i32: @@ -181,26 +181,26 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_fixed_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: or a4, a1, a3 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: xor a4, a1, a3 +; RV32I-NEXT: xor a5, a0, a2 +; RV32I-NEXT: slli a6, a4, 31 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srai a4, a4, 1 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srai a2, a1, 1 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: sltu a1, a0, a3 -; RV32I-NEXT: sub a1, a4, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: or a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 1 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srai a2, a2, 1 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret %or = or i64 %a0, %a1 %xor = xor i64 %a1, %a0 @@ -212,26 +212,26 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_ext_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: or a4, a1, a3 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: xor a4, a1, a3 +; RV32I-NEXT: xor a5, a0, a2 +; RV32I-NEXT: slli a6, a4, 31 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srai a4, a4, 1 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srai a2, a1, 1 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: sltu a1, a0, a3 -; RV32I-NEXT: sub a1, a4, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: or a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 1 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srai a2, a2, 1 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret %x0 = sext i64 %a0 to i128 %x1 = sext i64 %a1 to i128 diff --git a/llvm/test/CodeGen/RISCV/avgceilu.ll b/llvm/test/CodeGen/RISCV/avgceilu.ll index 1c1d1cbfd12cb..24b24cca5dd59 100644 --- a/llvm/test/CodeGen/RISCV/avgceilu.ll +++ b/llvm/test/CodeGen/RISCV/avgceilu.ll @@ -123,10 +123,10 @@ define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_fixed_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: or a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a2, a2, 1 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i32: @@ -149,10 +149,10 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_ext_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: or a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a2, a2, 1 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i32: @@ -177,26 +177,26 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_fixed_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: or a4, a1, a3 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: xor a4, a1, a3 +; RV32I-NEXT: xor a5, a0, a2 +; RV32I-NEXT: slli a6, a4, 31 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srli a4, a4, 1 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: sltu a1, a0, a3 -; RV32I-NEXT: sub a1, a4, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: or a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret %or = or i64 %a0, %a1 %xor = xor i64 %a1, %a0 @@ -208,26 +208,26 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_ext_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: or a4, a1, a3 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: xor a4, a1, a3 +; RV32I-NEXT: xor a5, a0, a2 +; RV32I-NEXT: slli a6, a4, 31 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a5, a6 +; RV32I-NEXT: srli a4, a4, 1 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a3, a3, 1 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: sltu a1, a0, a3 -; RV32I-NEXT: sub a1, a4, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: sub a1, a1, a2 ; RV32I-NEXT: sub a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: or a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret %x0 = zext i64 %a0 to i128 %x1 = zext i64 %a1 to i128 diff --git a/llvm/test/CodeGen/RISCV/avgfloors.ll b/llvm/test/CodeGen/RISCV/avgfloors.ll index b321f4c2f2939..beca25f94fe4f 100644 --- a/llvm/test/CodeGen/RISCV/avgfloors.ll +++ b/llvm/test/CodeGen/RISCV/avgfloors.ll @@ -121,10 +121,10 @@ define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_fixed_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srai a0, a0, 1 -; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srai a2, a2, 1 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i32: @@ -144,10 +144,10 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_ext_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srai a0, a0, 1 -; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srai a2, a2, 1 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i32: @@ -170,24 +170,24 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: and a4, a1, a3 ; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: srai a3, a1, 1 -; RV32I-NEXT: add a3, a4, a3 -; RV32I-NEXT: xor a4, a0, a2 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a4, a4, 1 -; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: slli a5, a1, 31 +; RV32I-NEXT: srli a3, a3, 1 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: and a2, a0, a2 -; RV32I-NEXT: add a0, a2, a1 -; RV32I-NEXT: sltu a1, a0, a2 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: srai a1, a1, 1 +; RV32I-NEXT: add a0, a2, a3 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: sltu a2, a0, a2 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srai a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret %and = and i64 %a0, %a1 %xor = xor i64 %a1, %a0 @@ -201,24 +201,24 @@ define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: and a4, a1, a3 ; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: srai a3, a1, 1 -; RV32I-NEXT: add a3, a4, a3 -; RV32I-NEXT: xor a4, a0, a2 -; RV32I-NEXT: slli a1, a1, 31 -; RV32I-NEXT: srli a4, a4, 1 -; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: xor a3, a0, a2 +; RV32I-NEXT: slli a5, a1, 31 +; RV32I-NEXT: srli a3, a3, 1 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: and a2, a0, a2 -; RV32I-NEXT: add a0, a2, a1 -; RV32I-NEXT: sltu a1, a0, a2 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: srai a1, a1, 1 +; RV32I-NEXT: add a0, a2, a3 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: sltu a2, a0, a2 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srai a0, a0, 1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srai a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret %x0 = sext i64 %a0 to i128 %x1 = sext i64 %a1 to i128 diff --git a/llvm/test/CodeGen/RISCV/avgflooru.ll b/llvm/test/CodeGen/RISCV/avgflooru.ll index 2e56f3359434c..49fc7a874580c 100644 --- a/llvm/test/CodeGen/RISCV/avgflooru.ll +++ b/llvm/test/CodeGen/RISCV/avgflooru.ll @@ -113,10 +113,10 @@ define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_fixed_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a2, a2, 1 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_fixed_i32: @@ -138,10 +138,10 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { ; RV32I-LABEL: test_ext_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: xor a2, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a2, a2, 1 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ext_i32: @@ -164,8 +164,8 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_fixed_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sltu a2, a0, a2 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: beq a1, a3, .LBB6_2 @@ -182,10 +182,10 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { ; ; RV64I-LABEL: test_fixed_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret %and = and i64 %a0, %a1 %xor = xor i64 %a1, %a0 @@ -197,8 +197,8 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { ; RV32I-LABEL: test_ext_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sltu a2, a0, a2 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: beq a1, a3, .LBB7_2 @@ -215,10 +215,10 @@ define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { ; ; RV64I-LABEL: test_ext_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: ret %x0 = zext i64 %a0 to i128 %x1 = zext i64 %a1 to i128 diff --git a/llvm/test/CodeGen/RISCV/bf16-promote.ll b/llvm/test/CodeGen/RISCV/bf16-promote.ll index 08c053fab4f67..b3f04975d04c4 100644 --- a/llvm/test/CodeGen/RISCV/bf16-promote.ll +++ b/llvm/test/CodeGen/RISCV/bf16-promote.ll @@ -111,12 +111,12 @@ define void @test_fadd(ptr %p, ptr %q) nounwind { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lhu a0, 0(a1) -; RV64-NEXT: lhu a1, 0(s0) -; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: lhu a0, 0(a0) +; RV64-NEXT: lhu a1, 0(a1) ; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: fmv.w.x fa4, a1 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: fmv.w.x fa5, a1 +; RV64-NEXT: fmv.w.x fa4, a0 ; RV64-NEXT: fadd.s fa0, fa4, fa5 ; RV64-NEXT: call __truncsfbf2 ; RV64-NEXT: fmv.x.w a0, fa0 @@ -132,12 +132,12 @@ define void @test_fadd(ptr %p, ptr %q) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lhu a0, 0(a1) -; RV32-NEXT: lhu a1, 0(s0) -; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: lhu a0, 0(a0) +; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: fmv.w.x fa4, a1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: fmv.w.x fa5, a1 +; RV32-NEXT: fmv.w.x fa4, a0 ; RV32-NEXT: fadd.s fa0, fa4, fa5 ; RV32-NEXT: call __truncsfbf2 ; RV32-NEXT: fmv.x.w a0, fa0 diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll index 871b43e61df50..00d665a5b3641 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-arith.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll @@ -73,25 +73,25 @@ declare bfloat @llvm.copysign.bf16(bfloat, bfloat) define bfloat @fsgnj_bf16(bfloat %a, bfloat %b) nounwind { ; RV32IZFBFMIN-LABEL: fsgnj_bf16: ; RV32IZFBFMIN: # %bb.0: -; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa1 -; RV32IZFBFMIN-NEXT: lui a1, 1048568 -; RV32IZFBFMIN-NEXT: and a0, a0, a1 -; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0 -; RV32IZFBFMIN-NEXT: slli a1, a1, 17 -; RV32IZFBFMIN-NEXT: srli a1, a1, 17 -; RV32IZFBFMIN-NEXT: or a0, a1, a0 +; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa0 +; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa1 +; RV32IZFBFMIN-NEXT: lui a2, 1048568 +; RV32IZFBFMIN-NEXT: slli a0, a0, 17 +; RV32IZFBFMIN-NEXT: and a1, a1, a2 +; RV32IZFBFMIN-NEXT: srli a0, a0, 17 +; RV32IZFBFMIN-NEXT: or a0, a0, a1 ; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0 ; RV32IZFBFMIN-NEXT: ret ; ; RV64IZFBFMIN-LABEL: fsgnj_bf16: ; RV64IZFBFMIN: # %bb.0: -; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa1 -; RV64IZFBFMIN-NEXT: lui a1, 1048568 -; RV64IZFBFMIN-NEXT: and a0, a0, a1 -; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0 -; RV64IZFBFMIN-NEXT: slli a1, a1, 49 -; RV64IZFBFMIN-NEXT: srli a1, a1, 49 -; RV64IZFBFMIN-NEXT: or a0, a1, a0 +; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa0 +; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa1 +; RV64IZFBFMIN-NEXT: lui a2, 1048568 +; RV64IZFBFMIN-NEXT: slli a0, a0, 49 +; RV64IZFBFMIN-NEXT: and a1, a1, a2 +; RV64IZFBFMIN-NEXT: srli a0, a0, 49 +; RV64IZFBFMIN-NEXT: or a0, a0, a1 ; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0 ; RV64IZFBFMIN-NEXT: ret %1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b) @@ -102,11 +102,11 @@ define i32 @fneg_bf16(bfloat %a, bfloat %b) nounwind { ; CHECK-LABEL: fneg_bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: fadd.s fa5, fa5, fa5 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: fmv.x.h a0, fa5 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: fmv.h.x fa4, a0 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 @@ -124,14 +124,14 @@ define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind { ; RV32IZFBFMIN: # %bb.0: ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; RV32IZFBFMIN-NEXT: lui a0, 1048568 ; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 ; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 -; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa5 -; RV32IZFBFMIN-NEXT: not a1, a1 -; RV32IZFBFMIN-NEXT: and a0, a1, a0 +; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa5 ; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0 +; RV32IZFBFMIN-NEXT: not a0, a0 +; RV32IZFBFMIN-NEXT: lui a2, 1048568 ; RV32IZFBFMIN-NEXT: slli a1, a1, 17 +; RV32IZFBFMIN-NEXT: and a0, a0, a2 ; RV32IZFBFMIN-NEXT: srli a1, a1, 17 ; RV32IZFBFMIN-NEXT: or a0, a1, a0 ; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0 @@ -141,14 +141,14 @@ define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind { ; RV64IZFBFMIN: # %bb.0: ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1 ; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; RV64IZFBFMIN-NEXT: lui a0, 1048568 ; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5 ; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5 -; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa5 -; RV64IZFBFMIN-NEXT: not a1, a1 -; RV64IZFBFMIN-NEXT: and a0, a1, a0 +; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa5 ; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0 +; RV64IZFBFMIN-NEXT: not a0, a0 +; RV64IZFBFMIN-NEXT: lui a2, 1048568 ; RV64IZFBFMIN-NEXT: slli a1, a1, 49 +; RV64IZFBFMIN-NEXT: and a0, a0, a2 ; RV64IZFBFMIN-NEXT: srli a1, a1, 49 ; RV64IZFBFMIN-NEXT: or a0, a1, a0 ; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0 @@ -247,16 +247,16 @@ define bfloat @fmsub_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa2 ; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 ; CHECK-NEXT: fadd.s fa5, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: fmv.x.h a0, fa5 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: fmv.h.x fa5, a0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: fmadd.s fa5, fa4, fa3, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %c_ = fadd bfloat 0.0, %c ; avoid negation using xor @@ -268,24 +268,24 @@ define bfloat @fmsub_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind { define bfloat @fnmadd_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK-LABEL: fnmadd_bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fadd.s fa5, fa5, fa4 -; CHECK-NEXT: fadd.s fa4, fa3, fa4 -; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fadd.s fa4, fa4, fa5 +; CHECK-NEXT: fadd.s fa5, fa3, fa5 ; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fmv.x.h a0, fa4 ; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: fmv.x.h a2, fa4 -; CHECK-NEXT: xor a1, a1, a0 -; CHECK-NEXT: xor a0, a2, a0 -; CHECK-NEXT: fmv.h.x fa5, a1 -; CHECK-NEXT: fmv.h.x fa4, a0 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: fmv.h.x fa5, a0 +; CHECK-NEXT: fmv.h.x fa4, a1 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 ; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 -; CHECK-NEXT: fmadd.s fa5, fa5, fa3, fa4 +; CHECK-NEXT: fmadd.s fa5, fa4, fa3, fa5 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %a_ = fadd bfloat 0.0, %a @@ -299,24 +299,24 @@ define bfloat @fnmadd_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind { define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK-LABEL: fnmadd_s_2: ; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fadd.s fa5, fa5, fa4 -; CHECK-NEXT: fadd.s fa4, fa3, fa4 -; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fadd.s fa4, fa4, fa5 +; CHECK-NEXT: fadd.s fa5, fa3, fa5 ; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fmv.x.h a0, fa4 ; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: fmv.x.h a2, fa4 -; CHECK-NEXT: xor a1, a1, a0 -; CHECK-NEXT: xor a0, a2, a0 -; CHECK-NEXT: fmv.h.x fa5, a1 -; CHECK-NEXT: fmv.h.x fa4, a0 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: fmv.h.x fa5, a0 +; CHECK-NEXT: fmv.h.x fa4, a1 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 ; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 -; CHECK-NEXT: fmadd.s fa5, fa3, fa5, fa4 +; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %b_ = fadd bfloat 0.0, %b @@ -369,16 +369,16 @@ define bfloat @fnmsub_bf16(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 ; CHECK-NEXT: fadd.s fa5, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: fmv.x.h a0, fa5 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: fmv.h.x fa5, a0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 -; CHECK-NEXT: fmadd.s fa5, fa5, fa4, fa3 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 +; CHECK-NEXT: fmadd.s fa5, fa5, fa3, fa4 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %a_ = fadd bfloat 0.0, %a @@ -392,16 +392,16 @@ define bfloat @fnmsub_bf16_2(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 ; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 ; CHECK-NEXT: fadd.s fa5, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: fmv.x.h a0, fa5 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: fmv.h.x fa5, a0 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa2 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: fmadd.s fa5, fa4, fa5, fa3 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fmadd.s fa5, fa3, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %b_ = fadd bfloat 0.0, %b @@ -452,27 +452,27 @@ define bfloat @fmsub_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { define bfloat @fnmadd_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK-LABEL: fnmadd_bf16_contract: ; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa2, fa2 -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: fadd.s fa5, fa5, fa4 -; CHECK-NEXT: fadd.s fa3, fa3, fa4 -; CHECK-NEXT: fadd.s fa4, fa2, fa4 -; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fcvt.bf16.s fa3, fa3 +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fadd.s fa4, fa4, fa5 +; CHECK-NEXT: fadd.s fa3, fa3, fa5 ; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa3, fa3 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 ; CHECK-NEXT: fcvt.s.bf16 fa3, fa3 -; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 -; CHECK-NEXT: fmul.s fa5, fa5, fa3 +; CHECK-NEXT: fmul.s fa4, fa3, fa4 +; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa2 +; CHECK-NEXT: fmv.x.h a0, fa4 +; CHECK-NEXT: fadd.s fa5, fa3, fa5 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: fmv.h.x fa4, a0 ; CHECK-NEXT: fcvt.bf16.s fa5, fa5 -; CHECK-NEXT: fmv.x.h a1, fa5 -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: fmv.h.x fa5, a0 -; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 -; CHECK-NEXT: fsub.s fa5, fa5, fa4 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fsub.s fa5, fa4, fa5 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %a_ = fadd bfloat 0.0, %a ; avoid negation using xor @@ -487,13 +487,13 @@ define bfloat @fnmadd_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { define bfloat @fnmsub_bf16_contract(bfloat %a, bfloat %b, bfloat %c) nounwind { ; CHECK-LABEL: fnmsub_bf16_contract: ; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: fmv.w.x fa4, zero -; CHECK-NEXT: fcvt.s.bf16 fa3, fa1 -; CHECK-NEXT: fadd.s fa5, fa5, fa4 -; CHECK-NEXT: fadd.s fa4, fa3, fa4 -; CHECK-NEXT: fcvt.bf16.s fa5, fa5 +; CHECK-NEXT: fmv.w.x fa5, zero +; CHECK-NEXT: fcvt.s.bf16 fa4, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa3, fa0 +; CHECK-NEXT: fadd.s fa4, fa4, fa5 +; CHECK-NEXT: fadd.s fa5, fa3, fa5 ; CHECK-NEXT: fcvt.bf16.s fa4, fa4 +; CHECK-NEXT: fcvt.bf16.s fa5, fa5 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 ; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 ; CHECK-NEXT: fmul.s fa5, fa5, fa4 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index 73ff888e44b3b..1acb495566908 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -50,69 +50,69 @@ define i16 @fcvt_si_bf16(bfloat %a) nounwind { define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK32ZFBFMIN-NEXT: lui a0, 815104 -; CHECK32ZFBFMIN-NEXT: lui a1, 290816 +; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32ZFBFMIN-NEXT: lui a0, 290816 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -512 +; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0 +; CHECK32ZFBFMIN-NEXT: fmin.s fa4, fa4, fa3 ; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: addi a1, a1, -512 +; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa4, rtz ; CHECK32ZFBFMIN-NEXT: neg a0, a0 -; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a1 -; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 -; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_si_bf16_sat: ; RV32ID: # %bb.0: # %start ; RV32ID-NEXT: fmv.x.w a0, fa0 -; RV32ID-NEXT: lui a1, 815104 -; RV32ID-NEXT: fmv.w.x fa5, a1 -; RV32ID-NEXT: lui a1, 290816 ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: addi a1, a1, -512 -; RV32ID-NEXT: fmv.w.x fa4, a0 -; RV32ID-NEXT: feq.s a0, fa4, fa4 -; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: lui a1, 815104 +; RV32ID-NEXT: fmv.w.x fa5, a0 ; RV32ID-NEXT: fmv.w.x fa4, a1 +; RV32ID-NEXT: lui a0, 290816 +; RV32ID-NEXT: addi a0, a0, -512 +; RV32ID-NEXT: fmax.s fa4, fa5, fa4 +; RV32ID-NEXT: fmv.w.x fa3, a0 +; RV32ID-NEXT: fmin.s fa4, fa4, fa3 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.w.s a1, fa4, rtz ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa5, fa4 -; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start -; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: lui a0, 815104 -; CHECK64ZFBFMIN-NEXT: lui a1, 290816 +; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64ZFBFMIN-NEXT: lui a0, 290816 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -512 +; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0 +; CHECK64ZFBFMIN-NEXT: fmin.s fa4, fa4, fa3 ; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -512 +; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa4, rtz ; CHECK64ZFBFMIN-NEXT: neg a0, a0 -; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a1 -; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 -; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_si_bf16_sat: ; RV64ID: # %bb.0: # %start ; RV64ID-NEXT: fmv.x.w a0, fa0 -; RV64ID-NEXT: lui a1, 815104 -; RV64ID-NEXT: fmv.w.x fa5, a1 -; RV64ID-NEXT: lui a1, 290816 ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: addi a1, a1, -512 -; RV64ID-NEXT: fmv.w.x fa4, a0 -; RV64ID-NEXT: feq.s a0, fa4, fa4 -; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: lui a1, 815104 +; RV64ID-NEXT: fmv.w.x fa5, a0 ; RV64ID-NEXT: fmv.w.x fa4, a1 +; RV64ID-NEXT: lui a0, 290816 +; RV64ID-NEXT: addi a0, a0, -512 +; RV64ID-NEXT: fmax.s fa4, fa5, fa4 +; RV64ID-NEXT: fmv.w.x fa3, a0 +; RV64ID-NEXT: fmin.s fa4, fa4, fa3 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.l.s a1, fa4, rtz ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa5, fa4 -; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: @@ -159,8 +159,8 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero ; CHECK32ZFBFMIN-NEXT: lui a0, 292864 -; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: addi a0, a0, -256 +; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 ; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz @@ -169,12 +169,12 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; RV32ID-LABEL: fcvt_ui_bf16_sat: ; RV32ID: # %bb.0: # %start ; RV32ID-NEXT: fmv.x.w a0, fa0 -; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: fmv.w.x fa5, a0 +; RV32ID-NEXT: fmv.w.x fa4, zero ; RV32ID-NEXT: lui a0, 292864 ; RV32ID-NEXT: addi a0, a0, -256 -; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa5, fa4 ; RV32ID-NEXT: fmv.w.x fa4, a0 ; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz @@ -185,8 +185,8 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, zero ; CHECK64ZFBFMIN-NEXT: lui a0, 292864 -; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: addi a0, a0, -256 +; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 ; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz @@ -195,12 +195,12 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; RV64ID-LABEL: fcvt_ui_bf16_sat: ; RV64ID: # %bb.0: # %start ; RV64ID-NEXT: fmv.x.w a0, fa0 -; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: fmv.w.x fa5, a0 +; RV64ID-NEXT: fmv.w.x fa4, zero ; RV64ID-NEXT: lui a0, 292864 ; RV64ID-NEXT: addi a0, a0, -256 -; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa5, fa4 ; RV64ID-NEXT: fmv.w.x fa4, a0 ; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz @@ -247,11 +247,11 @@ define i32 @fcvt_w_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_w_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: fcvt.w.s a0, fa5, rtz -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: seqz a1, a1 -; CHECK32ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK32ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz +; CHECK32ZFBFMIN-NEXT: seqz a0, a0 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_w_bf16_sat: @@ -259,21 +259,21 @@ define i32 @fcvt_w_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fcvt.w.s a0, fa5, rtz -; RV32ID-NEXT: feq.s a1, fa5, fa5 -; RV32ID-NEXT: seqz a1, a1 -; RV32ID-NEXT: addi a1, a1, -1 -; RV32ID-NEXT: and a0, a1, a0 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz +; RV32ID-NEXT: seqz a0, a0 +; RV32ID-NEXT: addi a0, a0, -1 +; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_w_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.w.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_w_bf16_sat: @@ -281,11 +281,11 @@ define i32 @fcvt_w_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.w.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a1, a0 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.w.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.bf16(bfloat %a) @@ -371,11 +371,11 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: seqz a1, a1 -; CHECK32ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK32ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a1, fa5, rtz +; CHECK32ZFBFMIN-NEXT: seqz a0, a0 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_wu_bf16_sat: @@ -383,21 +383,21 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz -; RV32ID-NEXT: feq.s a1, fa5, fa5 -; RV32ID-NEXT: seqz a1, a1 -; RV32ID-NEXT: addi a1, a1, -1 -; RV32ID-NEXT: and a0, a1, a0 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.wu.s a1, fa5, rtz +; RV32ID-NEXT: seqz a0, a0 +; RV32ID-NEXT: addi a0, a0, -1 +; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 ; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: ret @@ -407,11 +407,11 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a0, a1 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.wu.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a1, a0 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret @@ -468,8 +468,8 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 ; RV32IZFBFMIN-NEXT: lui a0, 913408 +; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 @@ -490,11 +490,11 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start ; RV32IZFBFMIN-NEXT: feq.s a3, fs0, fs0 ; RV32IZFBFMIN-NEXT: neg a4, s0 -; RV32IZFBFMIN-NEXT: neg a5, a1 -; RV32IZFBFMIN-NEXT: neg a3, a3 ; RV32IZFBFMIN-NEXT: and a0, a4, a0 +; RV32IZFBFMIN-NEXT: neg a1, a1 +; RV32IZFBFMIN-NEXT: neg a3, a3 +; RV32IZFBFMIN-NEXT: or a0, a1, a0 ; RV32IZFBFMIN-NEXT: and a1, a3, a2 -; RV32IZFBFMIN-NEXT: or a0, a5, a0 ; RV32IZFBFMIN-NEXT: and a0, a3, a0 ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -508,8 +508,8 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 ; R32IDZFBFMIN-NEXT: lui a0, 913408 +; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 ; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 @@ -530,11 +530,11 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start ; R32IDZFBFMIN-NEXT: feq.s a3, fs0, fs0 ; R32IDZFBFMIN-NEXT: neg a4, s0 -; R32IDZFBFMIN-NEXT: neg a5, a1 -; R32IDZFBFMIN-NEXT: neg a3, a3 ; R32IDZFBFMIN-NEXT: and a0, a4, a0 +; R32IDZFBFMIN-NEXT: neg a1, a1 +; R32IDZFBFMIN-NEXT: neg a3, a3 +; R32IDZFBFMIN-NEXT: or a0, a1, a0 ; R32IDZFBFMIN-NEXT: and a1, a3, a2 -; R32IDZFBFMIN-NEXT: or a0, a5, a0 ; R32IDZFBFMIN-NEXT: and a0, a3, a0 ; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -550,9 +550,9 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 +; RV32ID-NEXT: lui a1, 913408 ; RV32ID-NEXT: fmv.w.x fs0, a0 -; RV32ID-NEXT: lui a0, 913408 -; RV32ID-NEXT: fmv.w.x fa5, a0 +; RV32ID-NEXT: fmv.w.x fa5, a1 ; RV32ID-NEXT: fle.s s0, fa5, fs0 ; RV32ID-NEXT: fmv.s fa0, fs0 ; RV32ID-NEXT: call __fixsfdi @@ -571,12 +571,12 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: addi a2, a3, -1 ; RV32ID-NEXT: .LBB10_4: # %start ; RV32ID-NEXT: feq.s a3, fs0, fs0 -; RV32ID-NEXT: neg a4, a1 -; RV32ID-NEXT: neg a1, s0 +; RV32ID-NEXT: neg a4, s0 +; RV32ID-NEXT: neg a1, a1 +; RV32ID-NEXT: and a0, a4, a0 ; RV32ID-NEXT: neg a3, a3 -; RV32ID-NEXT: and a0, a1, a0 +; RV32ID-NEXT: or a0, a1, a0 ; RV32ID-NEXT: and a1, a3, a2 -; RV32ID-NEXT: or a0, a4, a0 ; RV32ID-NEXT: and a0, a3, a0 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -587,11 +587,11 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.l.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_l_bf16_sat: @@ -599,11 +599,11 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.l.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a1, a0 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: %0 = tail call i64 @llvm.fptosi.sat.i64.bf16(bfloat %a) @@ -664,13 +664,13 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-NEXT: neg s0, a0 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFBFMIN-NEXT: call __fixunssfdi -; RV32IZFBFMIN-NEXT: and a0, s0, a0 ; RV32IZFBFMIN-NEXT: lui a2, 391168 -; RV32IZFBFMIN-NEXT: and a1, s0, a1 ; RV32IZFBFMIN-NEXT: addi a2, a2, -1 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a2 ; RV32IZFBFMIN-NEXT: flt.s a2, fa5, fs0 +; RV32IZFBFMIN-NEXT: and a0, s0, a0 ; RV32IZFBFMIN-NEXT: neg a2, a2 +; RV32IZFBFMIN-NEXT: and a1, s0, a1 ; RV32IZFBFMIN-NEXT: or a0, a2, a0 ; RV32IZFBFMIN-NEXT: or a1, a2, a1 ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -691,13 +691,13 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; R32IDZFBFMIN-NEXT: neg s0, a0 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 ; R32IDZFBFMIN-NEXT: call __fixunssfdi -; R32IDZFBFMIN-NEXT: and a0, s0, a0 ; R32IDZFBFMIN-NEXT: lui a2, 391168 -; R32IDZFBFMIN-NEXT: and a1, s0, a1 ; R32IDZFBFMIN-NEXT: addi a2, a2, -1 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a2 ; R32IDZFBFMIN-NEXT: flt.s a2, fa5, fs0 +; R32IDZFBFMIN-NEXT: and a0, s0, a0 ; R32IDZFBFMIN-NEXT: neg a2, a2 +; R32IDZFBFMIN-NEXT: and a1, s0, a1 ; R32IDZFBFMIN-NEXT: or a0, a2, a0 ; R32IDZFBFMIN-NEXT: or a1, a2, a1 ; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -714,13 +714,13 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: lui a1, 391168 -; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: addi a1, a1, -1 -; RV32ID-NEXT: fmv.w.x fa0, a0 +; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: fmv.w.x fa0, a0 +; RV32ID-NEXT: fmv.w.x fa4, zero ; RV32ID-NEXT: flt.s a0, fa5, fa0 -; RV32ID-NEXT: fmv.w.x fa5, zero -; RV32ID-NEXT: fle.s a1, fa5, fa0 +; RV32ID-NEXT: fle.s a1, fa4, fa0 ; RV32ID-NEXT: neg s0, a0 ; RV32ID-NEXT: neg s1, a1 ; RV32ID-NEXT: call __fixunssfdi @@ -737,11 +737,11 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-LABEL: fcvt_lu_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_lu_bf16_sat: @@ -749,11 +749,11 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a1, a0 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.lu.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.bf16(bfloat %a) @@ -1620,65 +1620,65 @@ define signext i8 @fcvt_w_s_i8(bfloat %a) nounwind { define signext i8 @fcvt_w_s_sat_i8(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_w_s_sat_i8: ; CHECK32ZFBFMIN: # %bb.0: # %start -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK32ZFBFMIN-NEXT: lui a0, 798720 -; CHECK32ZFBFMIN-NEXT: lui a1, 274400 +; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32ZFBFMIN-NEXT: lui a0, 274400 +; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0 +; CHECK32ZFBFMIN-NEXT: fmin.s fa4, fa4, fa3 ; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa4, rtz ; CHECK32ZFBFMIN-NEXT: neg a0, a0 -; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a1 -; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 -; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_w_s_sat_i8: ; RV32ID: # %bb.0: # %start ; RV32ID-NEXT: fmv.x.w a0, fa0 -; RV32ID-NEXT: lui a1, 798720 -; RV32ID-NEXT: fmv.w.x fa5, a1 -; RV32ID-NEXT: lui a1, 274400 ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: fmv.w.x fa4, a0 -; RV32ID-NEXT: feq.s a0, fa4, fa4 -; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: lui a1, 798720 +; RV32ID-NEXT: fmv.w.x fa5, a0 ; RV32ID-NEXT: fmv.w.x fa4, a1 +; RV32ID-NEXT: lui a0, 274400 +; RV32ID-NEXT: fmax.s fa4, fa5, fa4 +; RV32ID-NEXT: fmv.w.x fa3, a0 +; RV32ID-NEXT: fmin.s fa4, fa4, fa3 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.w.s a1, fa4, rtz ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa5, fa4 -; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_w_s_sat_i8: ; CHECK64ZFBFMIN: # %bb.0: # %start -; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: lui a0, 798720 -; CHECK64ZFBFMIN-NEXT: lui a1, 274400 +; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64ZFBFMIN-NEXT: lui a0, 274400 +; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0 +; CHECK64ZFBFMIN-NEXT: fmin.s fa4, fa4, fa3 ; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa4, rtz ; CHECK64ZFBFMIN-NEXT: neg a0, a0 -; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a1 -; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 -; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_w_s_sat_i8: ; RV64ID: # %bb.0: # %start ; RV64ID-NEXT: fmv.x.w a0, fa0 -; RV64ID-NEXT: lui a1, 798720 -; RV64ID-NEXT: fmv.w.x fa5, a1 -; RV64ID-NEXT: lui a1, 274400 ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: fmv.w.x fa4, a0 -; RV64ID-NEXT: feq.s a0, fa4, fa4 -; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: lui a1, 798720 +; RV64ID-NEXT: fmv.w.x fa5, a0 ; RV64ID-NEXT: fmv.w.x fa4, a1 +; RV64ID-NEXT: lui a0, 274400 +; RV64ID-NEXT: fmax.s fa4, fa5, fa4 +; RV64ID-NEXT: fmv.w.x fa3, a0 +; RV64ID-NEXT: fmin.s fa4, fa4, fa3 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.l.s a1, fa4, rtz ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa5, fa4 -; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: @@ -1734,11 +1734,11 @@ define zeroext i8 @fcvt_wu_s_sat_i8(bfloat %a) nounwind { ; RV32ID-LABEL: fcvt_wu_s_sat_i8: ; RV32ID: # %bb.0: # %start ; RV32ID-NEXT: fmv.x.w a0, fa0 -; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: fmv.w.x fa5, a0 +; RV32ID-NEXT: fmv.w.x fa4, zero ; RV32ID-NEXT: lui a0, 276464 -; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa5, fa4 ; RV32ID-NEXT: fmv.w.x fa4, a0 ; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz @@ -1758,11 +1758,11 @@ define zeroext i8 @fcvt_wu_s_sat_i8(bfloat %a) nounwind { ; RV64ID-LABEL: fcvt_wu_s_sat_i8: ; RV64ID: # %bb.0: # %start ; RV64ID-NEXT: fmv.x.w a0, fa0 -; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: fmv.w.x fa5, a0 +; RV64ID-NEXT: fmv.w.x fa4, zero ; RV64ID-NEXT: lui a0, 276464 -; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa5, fa4 ; RV64ID-NEXT: fmv.w.x fa4, a0 ; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz @@ -1777,11 +1777,11 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16_sat_zext: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: seqz a1, a1 -; CHECK32ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK32ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a1, fa5, rtz +; CHECK32ZFBFMIN-NEXT: seqz a0, a0 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_wu_bf16_sat_zext: @@ -1789,21 +1789,21 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz -; RV32ID-NEXT: feq.s a1, fa5, fa5 -; RV32ID-NEXT: seqz a1, a1 -; RV32ID-NEXT: addi a1, a1, -1 -; RV32ID-NEXT: and a0, a1, a0 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.wu.s a1, fa5, rtz +; RV32ID-NEXT: seqz a0, a0 +; RV32ID-NEXT: addi a0, a0, -1 +; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16_sat_zext: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.wu.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 ; CHECK64ZFBFMIN-NEXT: slli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: srli a0, a0, 32 ; CHECK64ZFBFMIN-NEXT: ret @@ -1813,11 +1813,11 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.wu.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a0, a1 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.wu.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a1, a0 ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret @@ -1830,11 +1830,11 @@ define signext i32 @fcvt_w_bf16_sat_sext(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_w_bf16_sat_sext: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: fcvt.w.s a0, fa5, rtz -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: seqz a1, a1 -; CHECK32ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK32ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz +; CHECK32ZFBFMIN-NEXT: seqz a0, a0 +; CHECK32ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_w_bf16_sat_sext: @@ -1842,21 +1842,21 @@ define signext i32 @fcvt_w_bf16_sat_sext(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fcvt.w.s a0, fa5, rtz -; RV32ID-NEXT: feq.s a1, fa5, fa5 -; RV32ID-NEXT: seqz a1, a1 -; RV32ID-NEXT: addi a1, a1, -1 -; RV32ID-NEXT: and a0, a1, a0 +; RV32ID-NEXT: feq.s a0, fa5, fa5 +; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz +; RV32ID-NEXT: seqz a0, a0 +; RV32ID-NEXT: addi a0, a0, -1 +; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_w_bf16_sat_sext: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: fcvt.w.s a0, fa5, rtz -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: seqz a1, a1 -; CHECK64ZFBFMIN-NEXT: addi a1, a1, -1 -; CHECK64ZFBFMIN-NEXT: and a0, a1, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz +; CHECK64ZFBFMIN-NEXT: seqz a0, a0 +; CHECK64ZFBFMIN-NEXT: addi a0, a0, -1 +; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_w_bf16_sat_sext: @@ -1864,11 +1864,11 @@ define signext i32 @fcvt_w_bf16_sat_sext(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa5, a0 -; RV64ID-NEXT: fcvt.w.s a0, fa5, rtz -; RV64ID-NEXT: feq.s a1, fa5, fa5 -; RV64ID-NEXT: seqz a1, a1 -; RV64ID-NEXT: addi a1, a1, -1 -; RV64ID-NEXT: and a0, a1, a0 +; RV64ID-NEXT: feq.s a0, fa5, fa5 +; RV64ID-NEXT: fcvt.w.s a1, fa5, rtz +; RV64ID-NEXT: seqz a0, a0 +; RV64ID-NEXT: addi a0, a0, -1 +; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ret start: %0 = tail call i32 @llvm.fptosi.sat.i32.bf16(bfloat %a) diff --git a/llvm/test/CodeGen/RISCV/bfloat-imm.ll b/llvm/test/CodeGen/RISCV/bfloat-imm.ll index 61014891414d8..3c94b7bd70ea3 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-imm.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-imm.ll @@ -17,8 +17,8 @@ define bfloat @bfloat_imm() nounwind { define bfloat @bfloat_imm_op(bfloat %a) nounwind { ; CHECK-LABEL: bfloat_imm_op: ; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK-NEXT: lui a0, 260096 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fadd.s fa5, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll index cccbb04e6ae99..17e582e336c25 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll @@ -50,10 +50,10 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 ; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: lui a0, %hi(G) -; CHECK-NEXT: addi a0, a0, %lo(G) ; CHECK-NEXT: fadd.s fa5, fa4, fa5 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: lui a0, %hi(G) +; CHECK-NEXT: addi a0, a0, %lo(G) ; CHECK-NEXT: flh fa5, 0(a0) ; CHECK-NEXT: fsh fa0, 0(a0) ; CHECK-NEXT: flh fa5, 18(a0) diff --git a/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll b/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll index 2f7830c9c9d8a..47b58d2dc68e5 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-select-fcmp.ll @@ -116,9 +116,9 @@ define bfloat @select_fcmp_ord(bfloat %a, bfloat %b) nounwind { ; CHECK-LABEL: select_fcmp_ord: ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 ; CHECK-NEXT: feq.s a0, fa5, fa5 -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: feq.s a1, fa4, fa4 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: bnez a0, .LBB7_2 ; CHECK-NEXT: # %bb.1: @@ -232,9 +232,9 @@ define bfloat @select_fcmp_uno(bfloat %a, bfloat %b) nounwind { ; CHECK-LABEL: select_fcmp_uno: ; CHECK: # %bb.0: ; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 +; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 ; CHECK-NEXT: feq.s a0, fa5, fa5 -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: feq.s a1, fa4, fa4 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: beqz a0, .LBB14_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll index c83b0ed6b0eee..e43c7f55ac612 100644 --- a/llvm/test/CodeGen/RISCV/bfloat.ll +++ b/llvm/test/CodeGen/RISCV/bfloat.ll @@ -447,12 +447,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV32ID-ILP32: # %bb.0: ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: lhu a1, 0(a0) +; RV32ID-ILP32-NEXT: lhu a0, 6(a0) ; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 ; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 ; RV32ID-ILP32-NEXT: call __truncsfbf2 @@ -466,12 +466,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV64ID-LP64: # %bb.0: ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: lhu a1, 6(a0) -; RV64ID-LP64-NEXT: lhu a0, 0(a0) -; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: lhu a1, 0(a0) +; RV64ID-LP64-NEXT: lhu a0, 6(a0) ; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 ; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 ; RV64ID-LP64-NEXT: call __truncsfbf2 @@ -485,12 +485,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV32ID-ILP32D: # %bb.0: ; RV32ID-ILP32D-NEXT: addi sp, sp, -16 ; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32D-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: lhu a1, 0(a0) +; RV32ID-ILP32D-NEXT: lhu a0, 6(a0) ; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 ; RV32ID-ILP32D-NEXT: call __truncsfbf2 ; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 @@ -505,12 +505,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV64ID-LP64D: # %bb.0: ; RV64ID-LP64D-NEXT: addi sp, sp, -16 ; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: lhu a1, 6(a0) -; RV64ID-LP64D-NEXT: lhu a0, 0(a0) -; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: lhu a1, 0(a0) +; RV64ID-LP64D-NEXT: lhu a0, 6(a0) ; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 ; RV64ID-LP64D-NEXT: call __truncsfbf2 ; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 @@ -567,13 +567,14 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: mv s0, a0 ; RV32ID-ILP32-NEXT: slli a2, a2, 16 ; RV32ID-ILP32-NEXT: slli a1, a1, 16 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2 ; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 -; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 +; RV32ID-ILP32-NEXT: fmv.x.w a1, fa5 +; RV32ID-ILP32-NEXT: mv s0, a0 +; RV32ID-ILP32-NEXT: mv a0, a1 ; RV32ID-ILP32-NEXT: call __truncsfbf2 ; RV32ID-ILP32-NEXT: sh a0, 0(s0) ; RV32ID-ILP32-NEXT: sh a0, 16(s0) @@ -587,13 +588,14 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: mv s0, a0 ; RV64ID-LP64-NEXT: slli a2, a2, 16 ; RV64ID-LP64-NEXT: slli a1, a1, 16 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a2 ; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 -; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 +; RV64ID-LP64-NEXT: fmv.x.w a1, fa5 +; RV64ID-LP64-NEXT: mv s0, a0 +; RV64ID-LP64-NEXT: mv a0, a1 ; RV64ID-LP64-NEXT: call __truncsfbf2 ; RV64ID-LP64-NEXT: sh a0, 0(s0) ; RV64ID-LP64-NEXT: sh a0, 16(s0) @@ -607,14 +609,14 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV32ID-ILP32D-NEXT: addi sp, sp, -16 ; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: mv s0, a0 -; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 -; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 +; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa0 +; RV32ID-ILP32D-NEXT: fmv.x.w a2, fa1 +; RV32ID-ILP32D-NEXT: slli a2, a2, 16 ; RV32ID-ILP32D-NEXT: slli a1, a1, 16 -; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a2 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 +; RV32ID-ILP32D-NEXT: mv s0, a0 ; RV32ID-ILP32D-NEXT: call __truncsfbf2 ; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 ; RV32ID-ILP32D-NEXT: sh a0, 0(s0) @@ -629,14 +631,14 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { ; RV64ID-LP64D-NEXT: addi sp, sp, -16 ; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: mv s0, a0 -; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 -; RV64ID-LP64D-NEXT: fmv.x.w a1, fa1 +; RV64ID-LP64D-NEXT: fmv.x.w a1, fa0 +; RV64ID-LP64D-NEXT: fmv.x.w a2, fa1 +; RV64ID-LP64D-NEXT: slli a2, a2, 16 ; RV64ID-LP64D-NEXT: slli a1, a1, 16 -; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a2 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 +; RV64ID-LP64D-NEXT: mv s0, a0 ; RV64ID-LP64D-NEXT: call __truncsfbf2 ; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 ; RV64ID-LP64D-NEXT: sh a0, 0(s0) diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll index 35d38524c2e9a..c92c29d217d8a 100644 --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -351,10 +351,10 @@ define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: lui a1, 301408 ; RV32-NEXT: addi a1, a1, 722 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: addi a2, a0, -32 +; RV32-NEXT: srl a0, a1, a0 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: ret ; ; RV64I-LABEL: bittest_constant_by_var_shr_i64: @@ -391,10 +391,10 @@ define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: lui a1, 301408 ; RV32-NEXT: addi a1, a1, 722 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: addi a2, a0, -32 +; RV32-NEXT: srl a0, a1, a0 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: and a0, a2, a0 ; RV32-NEXT: ret ; ; RV64I-LABEL: bittest_constant_by_var_shl_i64: diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll index 1605e686e9177..8654ad109c4f3 100644 --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -23,18 +23,18 @@ declare i64 @llvm.bitreverse.i64(i64) define i16 @test_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bswap_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a0, 8 -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_i16: @@ -55,32 +55,32 @@ define i16 @test_bswap_i16(i16 %a) nounwind { define i32 @test_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: addi a1, a1, -256 +; RV32I-NEXT: and a2, a2, a1 ; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a2, a0, a2 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a2, a2, a3 ; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 8 -; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a1, a1, -256 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a2, a2, 8 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_i32: @@ -103,20 +103,20 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a2, a1, 8 ; RV32I-NEXT: lui a3, 16 ; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: srli a5, a0, 8 ; RV32I-NEXT: addi a3, a3, -256 ; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a5, a1, a3 ; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: slli a5, a1, 24 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: and a4, a4, a3 +; RV32I-NEXT: srli a5, a0, 24 ; RV32I-NEXT: and a3, a0, a3 -; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: or a3, a0, a3 ; RV32I-NEXT: or a0, a1, a2 ; RV32I-NEXT: or a1, a3, a4 @@ -127,27 +127,27 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 40 ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: srli a4, a0, 24 -; RV64I-NEXT: lui a5, 4080 ; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: srli a4, a0, 24 ; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a5, a0, 8 ; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: srliw a3, a3, 24 -; RV64I-NEXT: slli a3, a3, 24 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: and a5, a0, a5 -; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: and a4, a4, a3 ; RV64I-NEXT: slli a5, a5, 24 ; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: and a3, a0, a3 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: srliw a5, a0, 24 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: and a2, a0, a2 ; RV64I-NEXT: slli a2, a2, 40 -; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a3, a3, a5 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a1, a4, a1 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -176,27 +176,27 @@ define i7 @test_bitreverse_i7(i7 %a) nounwind { ; RV32I-NEXT: and a1, a1, a2 ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: lui a3, 61681 ; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: lui a2, 61681 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: lui a3, 344064 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: addi a1, a2, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 209715 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 348160 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 348160 +; RV32I-NEXT: lui a3, 344064 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: slli a0, a0, 1 @@ -209,53 +209,53 @@ define i7 @test_bitreverse_i7(i7 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 40 ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: srli a4, a0, 24 -; RV64I-NEXT: lui a5, 4080 -; RV64I-NEXT: srli a6, a0, 8 -; RV64I-NEXT: srliw a7, a0, 24 ; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: srli a4, a0, 24 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: srliw a6, a6, 24 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: lui a6, 209715 -; RV64I-NEXT: and a5, a0, a5 -; RV64I-NEXT: slli a7, a7, 32 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: addi a6, a6, 819 +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: lui a6, 4080 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: and a4, a4, a6 ; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a5, a5, a7 -; RV64I-NEXT: slli a7, a3, 32 -; RV64I-NEXT: add a3, a3, a7 -; RV64I-NEXT: slli a7, a6, 32 -; RV64I-NEXT: add a6, a6, a7 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: or a1, a4, a1 +; RV64I-NEXT: and a3, a0, a6 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: li a2, 21 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: li a4, 85 -; RV64I-NEXT: slli a2, a2, 58 -; RV64I-NEXT: slli a4, a4, 56 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: slli a1, a2, 32 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 819 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a6 -; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a4 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: li a2, 85 +; RV64I-NEXT: li a3, 21 +; RV64I-NEXT: slli a2, a2, 56 +; RV64I-NEXT: slli a3, a3, 58 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a0, a0, 57 @@ -270,54 +270,54 @@ define i7 @test_bitreverse_i7(i7 %a) nounwind { ; RV32ZBB-NEXT: and a2, a2, a1 ; RV32ZBB-NEXT: and a0, a0, a1 ; RV32ZBB-NEXT: lui a1, 209715 -; RV32ZBB-NEXT: addi a1, a1, 819 ; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: addi a1, a1, 819 ; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: srli a2, a0, 2 ; RV32ZBB-NEXT: and a0, a0, a1 ; RV32ZBB-NEXT: and a1, a2, a1 -; RV32ZBB-NEXT: lui a2, 344064 ; RV32ZBB-NEXT: slli a0, a0, 2 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: lui a1, 348160 -; RV32ZBB-NEXT: and a1, a0, a1 -; RV32ZBB-NEXT: srli a0, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: lui a1, 344064 +; RV32ZBB-NEXT: and a1, a2, a1 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: srli a0, a0, 25 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_i7: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: lui a1, 61681 -; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: addi a1, a1, -241 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: lui a2, 209715 ; RV64ZBB-NEXT: slli a3, a1, 32 ; RV64ZBB-NEXT: add a1, a1, a3 -; RV64ZBB-NEXT: slli a3, a2, 32 -; RV64ZBB-NEXT: add a2, a2, a3 ; RV64ZBB-NEXT: srli a3, a0, 4 -; RV64ZBB-NEXT: and a3, a3, a1 +; RV64ZBB-NEXT: addi a2, a2, 819 ; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: li a1, 21 +; RV64ZBB-NEXT: and a1, a3, a1 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a3, a0 -; RV64ZBB-NEXT: srli a3, a0, 2 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: srli a1, a0, 2 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a2, a3, a2 -; RV64ZBB-NEXT: li a3, 85 -; RV64ZBB-NEXT: slli a1, a1, 58 -; RV64ZBB-NEXT: slli a3, a3, 56 +; RV64ZBB-NEXT: and a1, a1, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: and a1, a2, a1 -; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: li a2, 21 ; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: li a1, 85 +; RV64ZBB-NEXT: srli a3, a0, 1 +; RV64ZBB-NEXT: slli a1, a1, 56 +; RV64ZBB-NEXT: slli a2, a2, 58 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a2, a3, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 ; RV64ZBB-NEXT: srli a0, a0, 57 ; RV64ZBB-NEXT: ret ; @@ -440,20 +440,20 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 3 -; RV32I-NEXT: addi a2, a2, 819 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 3 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 5 -; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: lui a2, 5 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret @@ -469,20 +469,20 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 3 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: lui a2, 3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 5 -; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: lui a2, 5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addi a1, a2, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -493,22 +493,22 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; RV32ZBB-NEXT: lui a1, 15 ; RV32ZBB-NEXT: srli a2, a0, 12 ; RV32ZBB-NEXT: addi a1, a1, 240 -; RV32ZBB-NEXT: and a1, a2, a1 -; RV32ZBB-NEXT: lui a2, 3 ; RV32ZBB-NEXT: srli a0, a0, 20 -; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: andi a0, a0, -241 ; RV32ZBB-NEXT: or a0, a0, a1 -; RV32ZBB-NEXT: srli a1, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: lui a2, 5 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: lui a1, 3 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: addi a1, a1, 819 +; RV32ZBB-NEXT: and a2, a2, a1 +; RV32ZBB-NEXT: and a0, a0, a1 ; RV32ZBB-NEXT: slli a0, a0, 2 -; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: lui a1, 5 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: addi a1, a1, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret @@ -519,22 +519,22 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; RV64ZBB-NEXT: lui a1, 15 ; RV64ZBB-NEXT: srli a2, a0, 44 ; RV64ZBB-NEXT: addi a1, a1, 240 -; RV64ZBB-NEXT: and a1, a2, a1 -; RV64ZBB-NEXT: lui a2, 3 ; RV64ZBB-NEXT: srli a0, a0, 52 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: andi a0, a0, -241 ; RV64ZBB-NEXT: or a0, a0, a1 -; RV64ZBB-NEXT: srli a1, a0, 2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 5 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: lui a1, 3 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: addi a1, a1, 819 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: lui a1, 5 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: addi a1, a1, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -559,98 +559,98 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { define i32 @test_bitreverse_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bitreverse_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: addi a1, a1, -256 ; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: slli a2, a2, 8 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a3, a3, -241 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a2, a2, 819 -; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 8 -; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: addi a1, a1, -256 ; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: slliw a0, a0, 24 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: slli a2, a2, 8 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a3, a3, -241 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: lui a3, 349525 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: addi a3, a3, 1365 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: slliw a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: addi a1, a1, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: rev8 a0, a0 ; RV32ZBB-NEXT: lui a1, 61681 -; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: rev8 a0, a0 ; RV32ZBB-NEXT: addi a1, a1, -241 -; RV32ZBB-NEXT: and a2, a2, a1 +; RV32ZBB-NEXT: srli a2, a0, 4 ; RV32ZBB-NEXT: and a0, a0, a1 -; RV32ZBB-NEXT: lui a1, 209715 -; RV32ZBB-NEXT: addi a1, a1, 819 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 4 -; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: addi a1, a2, 819 ; RV32ZBB-NEXT: srli a2, a0, 2 ; RV32ZBB-NEXT: and a0, a0, a1 ; RV32ZBB-NEXT: and a1, a2, a1 -; RV32ZBB-NEXT: lui a2, 349525 -; RV32ZBB-NEXT: addi a2, a2, 1365 ; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: lui a2, 349525 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: addi a1, a2, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret @@ -658,28 +658,28 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind { ; RV64ZBB-LABEL: test_bitreverse_i32: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: lui a1, 61681 -; RV64ZBB-NEXT: srli a2, a0, 36 -; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: lui a1, 986895 +; RV64ZBB-NEXT: srli a2, a0, 28 +; RV64ZBB-NEXT: addi a1, a1, 240 ; RV64ZBB-NEXT: and a1, a2, a1 -; RV64ZBB-NEXT: lui a2, 986895 -; RV64ZBB-NEXT: srli a0, a0, 28 -; RV64ZBB-NEXT: addi a2, a2, 240 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: addi a2, a2, 819 -; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: srli a0, a0, 36 +; RV64ZBB-NEXT: addi a2, a2, -241 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 349525 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: sext.w a1, a1 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: lui a1, 209715 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: addi a1, a1, 819 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 ; RV64ZBB-NEXT: slliw a0, a0, 2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: lui a1, 349525 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: addi a1, a1, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -706,59 +706,59 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a2, a1, 8 ; RV32I-NEXT: lui a3, 16 ; RV32I-NEXT: srli a4, a1, 24 -; RV32I-NEXT: slli a5, a1, 24 -; RV32I-NEXT: lui a6, 61681 -; RV32I-NEXT: srli a7, a0, 8 ; RV32I-NEXT: addi a3, a3, -256 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: and a7, a7, a3 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: lui a7, 209715 +; RV32I-NEXT: slli a5, a1, 24 ; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: lui a5, 349525 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: addi a2, a4, -241 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: lui a5, 209715 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: addi a4, a5, 819 +; RV32I-NEXT: srli a5, a1, 2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a5, a5, a4 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: srli a5, a0, 8 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: srli a6, a0, 24 +; RV32I-NEXT: srli a7, a1, 1 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: lui a6, 349525 ; RV32I-NEXT: and a3, a0, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: addi a6, a6, -241 -; RV32I-NEXT: addi a7, a7, 819 -; RV32I-NEXT: addi a5, a5, 1365 ; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: addi a6, a6, 1365 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: srli a2, a1, 4 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: srli a3, a0, 4 -; RV32I-NEXT: and a0, a0, a6 -; RV32I-NEXT: and a2, a2, a6 -; RV32I-NEXT: slli a1, a1, 4 -; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a3, a7, a6 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: srli a5, a0, 4 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: and a2, a5, a2 ; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: srli a2, a1, 2 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: srli a3, a0, 2 -; RV32I-NEXT: and a0, a0, a7 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: slli a1, a1, 2 -; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: and a0, a0, a5 -; RV32I-NEXT: and a2, a2, a5 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: slli a4, a0, 1 -; RV32I-NEXT: or a0, a2, a1 -; RV32I-NEXT: or a1, a3, a4 +; RV32I-NEXT: or a0, a3, a1 +; RV32I-NEXT: or a1, a2, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_i64: @@ -766,53 +766,53 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 40 ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: srli a4, a0, 24 -; RV64I-NEXT: lui a5, 4080 -; RV64I-NEXT: srli a6, a0, 8 -; RV64I-NEXT: srliw a7, a0, 24 -; RV64I-NEXT: lui t0, 61681 ; RV64I-NEXT: addi a2, a2, -256 +; RV64I-NEXT: srli a4, a0, 24 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: lui a3, 209715 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: srliw a6, a6, 24 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a4 -; RV64I-NEXT: lui a6, 349525 -; RV64I-NEXT: and a5, a0, a5 -; RV64I-NEXT: slli a7, a7, 32 -; RV64I-NEXT: addi t0, t0, -241 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: addi a6, a6, 1365 +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: lui a6, 4080 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: and a4, a4, a6 ; RV64I-NEXT: slli a5, a5, 24 -; RV64I-NEXT: or a5, a5, a7 -; RV64I-NEXT: slli a7, t0, 32 -; RV64I-NEXT: add a7, t0, a7 -; RV64I-NEXT: slli t0, a3, 32 -; RV64I-NEXT: add a3, a3, t0 -; RV64I-NEXT: slli t0, a6, 32 -; RV64I-NEXT: add a6, a6, t0 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: or a1, a4, a1 +; RV64I-NEXT: and a3, a0, a6 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: and a2, a0, a2 -; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a0, a0, a7 -; RV64I-NEXT: and a1, a1, a7 +; RV64I-NEXT: slli a1, a2, 32 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, a2, 1365 ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a6 -; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -822,71 +822,71 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ; RV32ZBB-NEXT: rev8 a1, a1 ; RV32ZBB-NEXT: lui a2, 61681 ; RV32ZBB-NEXT: lui a3, 209715 -; RV32ZBB-NEXT: rev8 a0, a0 -; RV32ZBB-NEXT: srli a4, a1, 4 ; RV32ZBB-NEXT: addi a2, a2, -241 -; RV32ZBB-NEXT: srli a5, a0, 4 -; RV32ZBB-NEXT: and a4, a4, a2 +; RV32ZBB-NEXT: srli a4, a1, 4 ; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a5, a5, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: lui a2, 349525 -; RV32ZBB-NEXT: addi a3, a3, 819 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a4, a4, a2 ; RV32ZBB-NEXT: slli a1, a1, 4 -; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: addi a3, a3, 819 ; RV32ZBB-NEXT: or a1, a4, a1 -; RV32ZBB-NEXT: or a0, a5, a0 ; RV32ZBB-NEXT: srli a4, a1, 2 ; RV32ZBB-NEXT: and a1, a1, a3 -; RV32ZBB-NEXT: srli a5, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a3 ; RV32ZBB-NEXT: and a4, a4, a3 ; RV32ZBB-NEXT: slli a1, a1, 2 -; RV32ZBB-NEXT: and a3, a5, a3 -; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: rev8 a0, a0 ; RV32ZBB-NEXT: or a1, a4, a1 -; RV32ZBB-NEXT: or a0, a3, a0 -; RV32ZBB-NEXT: srli a3, a1, 1 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: srli a4, a0, 1 +; RV32ZBB-NEXT: lui a4, 349525 +; RV32ZBB-NEXT: srli a5, a0, 4 +; RV32ZBB-NEXT: and a5, a5, a2 ; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a3, a3, a2 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: addi a4, a4, 1365 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: or a0, a5, a0 +; RV32ZBB-NEXT: srli a5, a0, 2 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: and a3, a5, a3 +; RV32ZBB-NEXT: slli a0, a0, 2 ; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: and a2, a4, a2 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: srli a3, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: and a3, a3, a4 ; RV32ZBB-NEXT: slli a4, a0, 1 -; RV32ZBB-NEXT: or a0, a3, a1 -; RV32ZBB-NEXT: or a1, a2, a4 +; RV32ZBB-NEXT: or a0, a2, a1 +; RV32ZBB-NEXT: or a1, a3, a4 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: lui a1, 61681 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: lui a3, 349525 +; RV64ZBB-NEXT: rev8 a0, a0 ; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: slli a3, a1, 32 +; RV64ZBB-NEXT: add a1, a1, a3 +; RV64ZBB-NEXT: srli a3, a0, 4 ; RV64ZBB-NEXT: addi a2, a2, 819 -; RV64ZBB-NEXT: addi a3, a3, 1365 -; RV64ZBB-NEXT: slli a4, a1, 32 -; RV64ZBB-NEXT: add a1, a1, a4 -; RV64ZBB-NEXT: slli a4, a2, 32 -; RV64ZBB-NEXT: add a2, a2, a4 -; RV64ZBB-NEXT: slli a4, a3, 32 -; RV64ZBB-NEXT: add a3, a3, a4 -; RV64ZBB-NEXT: srli a4, a0, 4 -; RV64ZBB-NEXT: and a4, a4, a1 ; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a3, a1 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a4, a0 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: add a2, a2, a3 ; RV64ZBB-NEXT: srli a1, a0, 2 ; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: lui a3, 349525 ; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: addi a2, a3, 1365 ; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: slli a3, a2, 32 ; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: add a2, a2, a3 ; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: and a1, a1, a3 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: and a1, a1, a2 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -911,100 +911,100 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bswap_bitreverse_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, -241 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: lui a2, 3 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 3 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 5 -; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: lui a2, 5 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_bitreverse_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 1 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, 3 -; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: lui a2, 3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 5 -; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: lui a2, 5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addi a1, a2, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bswap_bitreverse_i16: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srli a1, a0, 4 -; RV32ZBB-NEXT: lui a2, 1 -; RV32ZBB-NEXT: addi a2, a2, -241 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: lui a2, 3 -; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: lui a1, 1 +; RV32ZBB-NEXT: addi a1, a1, -241 +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: lui a2, 3 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: lui a2, 5 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: addi a1, a2, 819 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: lui a2, 5 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: addi a1, a2, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bswap_bitreverse_i16: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srli a1, a0, 4 -; RV64ZBB-NEXT: lui a2, 1 -; RV64ZBB-NEXT: addi a2, a2, -241 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: lui a2, 3 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: lui a1, 1 +; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: lui a2, 3 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 5 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: addi a1, a2, 819 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: lui a2, 5 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: addi a1, a2, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -1026,100 +1026,100 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bswap_bitreverse_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a2, 61681 -; RV32I-NEXT: addi a2, a2, -241 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 209715 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_bitreverse_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 61681 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 4 +; RV64I-NEXT: lui a2, 209715 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addi a1, a2, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bswap_bitreverse_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srli a1, a0, 4 -; RV32ZBB-NEXT: lui a2, 61681 -; RV32ZBB-NEXT: addi a2, a2, -241 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: lui a2, 209715 -; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: lui a1, 61681 +; RV32ZBB-NEXT: addi a1, a1, -241 +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: lui a2, 209715 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: lui a2, 349525 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: addi a1, a2, 819 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: lui a2, 349525 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: addi a1, a2, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bswap_bitreverse_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srli a1, a0, 4 -; RV64ZBB-NEXT: lui a2, 61681 -; RV64ZBB-NEXT: addi a2, a2, -241 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: lui a1, 61681 +; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 4 +; RV64ZBB-NEXT: lui a2, 209715 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 349525 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: addi a1, a2, 819 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: lui a2, 349525 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: addi a1, a2, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -1144,68 +1144,68 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a2, a0, 4 ; RV32I-NEXT: lui a3, 61681 ; RV32I-NEXT: lui a4, 209715 -; RV32I-NEXT: srli a5, a1, 4 ; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: lui a5, 349525 ; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a4, a4, 819 -; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: addi a4, a4, 819 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: srli a2, a0, 2 ; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: and a4, a5, a4 -; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a2, a1, 4 ; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: addi a5, a5, 1365 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: and a2, a2, a5 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bswap_bitreverse_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 349525 ; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a4, a1, 32 -; RV64I-NEXT: add a1, a1, a4 -; RV64I-NEXT: slli a4, a2, 32 -; RV64I-NEXT: add a2, a2, a4 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: add a3, a3, a4 -; RV64I-NEXT: srli a4, a0, 4 -; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: slli a3, a1, 32 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: lui a3, 209715 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addi a3, a3, 819 +; RV64I-NEXT: addi a1, a1, 1365 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a4, a0 -; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: slli a2, a3, 32 +; RV64I-NEXT: add a2, a3, a2 +; RV64I-NEXT: srli a3, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: slli a3, a1, 32 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1215,68 +1215,68 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { ; RV32ZBB-NEXT: srli a2, a0, 4 ; RV32ZBB-NEXT: lui a3, 61681 ; RV32ZBB-NEXT: lui a4, 209715 -; RV32ZBB-NEXT: srli a5, a1, 4 ; RV32ZBB-NEXT: addi a3, a3, -241 -; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: lui a5, 349525 ; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: and a5, a5, a3 -; RV32ZBB-NEXT: and a1, a1, a3 -; RV32ZBB-NEXT: lui a3, 349525 -; RV32ZBB-NEXT: addi a4, a4, 819 -; RV32ZBB-NEXT: addi a3, a3, 1365 +; RV32ZBB-NEXT: and a2, a2, a3 ; RV32ZBB-NEXT: slli a0, a0, 4 -; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: addi a4, a4, 819 ; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a5, a1 ; RV32ZBB-NEXT: srli a2, a0, 2 ; RV32ZBB-NEXT: and a0, a0, a4 -; RV32ZBB-NEXT: srli a5, a1, 2 -; RV32ZBB-NEXT: and a1, a1, a4 ; RV32ZBB-NEXT: and a2, a2, a4 ; RV32ZBB-NEXT: slli a0, a0, 2 -; RV32ZBB-NEXT: and a4, a5, a4 -; RV32ZBB-NEXT: slli a1, a1, 2 ; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a4, a1 -; RV32ZBB-NEXT: srli a2, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: srli a4, a1, 1 -; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: srli a2, a1, 4 ; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: srli a3, a0, 1 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a3, a3, a5 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 ; RV32ZBB-NEXT: slli a0, a0, 1 -; RV32ZBB-NEXT: and a3, a4, a3 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: and a2, a2, a5 ; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bswap_bitreverse_i64: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: lui a1, 61681 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: lui a3, 349525 ; RV64ZBB-NEXT: addi a1, a1, -241 -; RV64ZBB-NEXT: addi a2, a2, 819 -; RV64ZBB-NEXT: addi a3, a3, 1365 -; RV64ZBB-NEXT: slli a4, a1, 32 -; RV64ZBB-NEXT: add a1, a1, a4 -; RV64ZBB-NEXT: slli a4, a2, 32 -; RV64ZBB-NEXT: add a2, a2, a4 -; RV64ZBB-NEXT: slli a4, a3, 32 -; RV64ZBB-NEXT: add a3, a3, a4 -; RV64ZBB-NEXT: srli a4, a0, 4 -; RV64ZBB-NEXT: and a4, a4, a1 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: slli a3, a1, 32 +; RV64ZBB-NEXT: add a1, a1, a3 +; RV64ZBB-NEXT: lui a3, 209715 +; RV64ZBB-NEXT: and a2, a2, a1 ; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, 349525 +; RV64ZBB-NEXT: addi a3, a3, 819 +; RV64ZBB-NEXT: addi a1, a1, 1365 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a4, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: slli a2, a3, 32 +; RV64ZBB-NEXT: add a2, a3, a2 +; RV64ZBB-NEXT: srli a3, a0, 2 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a2, a3, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: and a1, a1, a3 +; RV64ZBB-NEXT: slli a3, a1, 32 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: add a1, a1, a3 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -1299,100 +1299,100 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, -241 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: lui a2, 3 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 3 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 5 -; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: lui a2, 5 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 1 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, 3 -; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: lui a2, 3 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 5 -; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: lui a2, 5 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addi a1, a2, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i16: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srli a1, a0, 4 -; RV32ZBB-NEXT: lui a2, 1 -; RV32ZBB-NEXT: addi a2, a2, -241 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: lui a2, 3 -; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: lui a1, 1 +; RV32ZBB-NEXT: addi a1, a1, -241 +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: lui a2, 3 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: lui a2, 5 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: addi a1, a2, 819 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: lui a2, 5 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: addi a1, a2, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i16: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srli a1, a0, 4 -; RV64ZBB-NEXT: lui a2, 1 -; RV64ZBB-NEXT: addi a2, a2, -241 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: lui a2, 3 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: lui a1, 1 +; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: lui a2, 3 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 5 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: addi a1, a2, 819 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: lui a2, 5 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: addi a1, a2, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -1414,100 +1414,100 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { ; RV32I-LABEL: test_bitreverse_bswap_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: lui a2, 61681 -; RV32I-NEXT: addi a2, a2, -241 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: lui a2, 209715 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: addi a1, a2, 819 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: addi a1, a2, 1365 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 61681 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 4 +; RV64I-NEXT: lui a2, 209715 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: addi a1, a2, 819 +; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addi a1, a2, 1365 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slliw a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srli a1, a0, 4 -; RV32ZBB-NEXT: lui a2, 61681 -; RV32ZBB-NEXT: addi a2, a2, -241 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: lui a2, 209715 -; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: lui a1, 61681 +; RV32ZBB-NEXT: addi a1, a1, -241 +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: lui a2, 209715 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 2 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 -; RV32ZBB-NEXT: lui a2, 349525 -; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: addi a1, a2, 819 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: lui a2, 349525 ; RV32ZBB-NEXT: or a0, a1, a0 -; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a2 -; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: addi a1, a2, 1365 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: and a1, a2, a1 ; RV32ZBB-NEXT: slli a0, a0, 1 ; RV32ZBB-NEXT: or a0, a1, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: srli a1, a0, 4 -; RV64ZBB-NEXT: lui a2, 61681 -; RV64ZBB-NEXT: addi a2, a2, -241 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: addi a2, a2, 819 +; RV64ZBB-NEXT: lui a1, 61681 +; RV64ZBB-NEXT: addi a1, a1, -241 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 4 +; RV64ZBB-NEXT: lui a2, 209715 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 -; RV64ZBB-NEXT: lui a2, 349525 -; RV64ZBB-NEXT: addi a2, a2, 1365 +; RV64ZBB-NEXT: addi a1, a2, 819 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: lui a2, 349525 ; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: addi a1, a2, 1365 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slliw a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret @@ -1532,68 +1532,68 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { ; RV32I-NEXT: srli a2, a0, 4 ; RV32I-NEXT: lui a3, 61681 ; RV32I-NEXT: lui a4, 209715 -; RV32I-NEXT: srli a5, a1, 4 ; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: lui a5, 349525 ; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a4, a4, 819 -; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: slli a0, a0, 4 -; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: addi a4, a4, 819 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: srli a2, a0, 2 ; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: and a4, a5, a4 -; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srli a2, a0, 1 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a2, a1, 4 ; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: addi a5, a5, 1365 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: and a2, a2, a5 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_bitreverse_bswap_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 349525 ; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a4, a1, 32 -; RV64I-NEXT: add a1, a1, a4 -; RV64I-NEXT: slli a4, a2, 32 -; RV64I-NEXT: add a2, a2, a4 -; RV64I-NEXT: slli a4, a3, 32 -; RV64I-NEXT: add a3, a3, a4 -; RV64I-NEXT: srli a4, a0, 4 -; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: slli a3, a1, 32 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: lui a3, 209715 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addi a3, a3, 819 +; RV64I-NEXT: addi a1, a1, 1365 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a4, a0 -; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: slli a2, a3, 32 +; RV64I-NEXT: add a2, a3, a2 +; RV64I-NEXT: srli a3, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: slli a3, a1, 32 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: and a1, a2, a1 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1603,68 +1603,68 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { ; RV32ZBB-NEXT: srli a2, a0, 4 ; RV32ZBB-NEXT: lui a3, 61681 ; RV32ZBB-NEXT: lui a4, 209715 -; RV32ZBB-NEXT: srli a5, a1, 4 ; RV32ZBB-NEXT: addi a3, a3, -241 -; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: lui a5, 349525 ; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: and a5, a5, a3 -; RV32ZBB-NEXT: and a1, a1, a3 -; RV32ZBB-NEXT: lui a3, 349525 -; RV32ZBB-NEXT: addi a4, a4, 819 -; RV32ZBB-NEXT: addi a3, a3, 1365 +; RV32ZBB-NEXT: and a2, a2, a3 ; RV32ZBB-NEXT: slli a0, a0, 4 -; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: addi a4, a4, 819 ; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a5, a1 ; RV32ZBB-NEXT: srli a2, a0, 2 ; RV32ZBB-NEXT: and a0, a0, a4 -; RV32ZBB-NEXT: srli a5, a1, 2 -; RV32ZBB-NEXT: and a1, a1, a4 ; RV32ZBB-NEXT: and a2, a2, a4 ; RV32ZBB-NEXT: slli a0, a0, 2 -; RV32ZBB-NEXT: and a4, a5, a4 -; RV32ZBB-NEXT: slli a1, a1, 2 ; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a4, a1 -; RV32ZBB-NEXT: srli a2, a0, 1 -; RV32ZBB-NEXT: and a0, a0, a3 -; RV32ZBB-NEXT: srli a4, a1, 1 -; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: srli a2, a1, 4 ; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: srli a3, a0, 1 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a3, a3, a5 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 ; RV32ZBB-NEXT: slli a0, a0, 1 -; RV32ZBB-NEXT: and a3, a4, a3 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: and a2, a2, a5 ; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a3, a0 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i64: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: lui a1, 61681 -; RV64ZBB-NEXT: lui a2, 209715 -; RV64ZBB-NEXT: lui a3, 349525 ; RV64ZBB-NEXT: addi a1, a1, -241 -; RV64ZBB-NEXT: addi a2, a2, 819 -; RV64ZBB-NEXT: addi a3, a3, 1365 -; RV64ZBB-NEXT: slli a4, a1, 32 -; RV64ZBB-NEXT: add a1, a1, a4 -; RV64ZBB-NEXT: slli a4, a2, 32 -; RV64ZBB-NEXT: add a2, a2, a4 -; RV64ZBB-NEXT: slli a4, a3, 32 -; RV64ZBB-NEXT: add a3, a3, a4 -; RV64ZBB-NEXT: srli a4, a0, 4 -; RV64ZBB-NEXT: and a4, a4, a1 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: slli a3, a1, 32 +; RV64ZBB-NEXT: add a1, a1, a3 +; RV64ZBB-NEXT: lui a3, 209715 +; RV64ZBB-NEXT: and a2, a2, a1 ; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, 349525 +; RV64ZBB-NEXT: addi a3, a3, 819 +; RV64ZBB-NEXT: addi a1, a1, 1365 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a4, a0 -; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: slli a2, a3, 32 +; RV64ZBB-NEXT: add a2, a3, a2 +; RV64ZBB-NEXT: srli a3, a0, 2 ; RV64ZBB-NEXT: and a0, a0, a2 -; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a2, a3, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a1, a0 -; RV64ZBB-NEXT: srli a1, a0, 1 -; RV64ZBB-NEXT: and a0, a0, a3 -; RV64ZBB-NEXT: and a1, a1, a3 +; RV64ZBB-NEXT: slli a3, a1, 32 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: add a1, a1, a3 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: and a1, a2, a1 ; RV64ZBB-NEXT: slli a0, a0, 1 ; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll index 2999a7e4981bc..fe1e57fd1c631 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -49,30 +49,30 @@ define void @callee() nounwind { ; ILP32-NEXT: flw ft9, 68(a0) ; ILP32-NEXT: flw ft10, 72(a0) ; ILP32-NEXT: flw ft11, 76(a0) -; ILP32-NEXT: flw fs0, 80(a0) -; ILP32-NEXT: flw fs1, 84(a0) -; ILP32-NEXT: flw fs2, 88(a0) -; ILP32-NEXT: flw fs3, 92(a0) -; ILP32-NEXT: flw fs4, 112(a0) -; ILP32-NEXT: flw fs5, 116(a0) -; ILP32-NEXT: flw fs6, 120(a0) -; ILP32-NEXT: flw fs7, 124(a0) -; ILP32-NEXT: flw fs8, 96(a0) -; ILP32-NEXT: flw fs9, 100(a0) -; ILP32-NEXT: flw fs10, 104(a0) -; ILP32-NEXT: flw fs11, 108(a0) -; ILP32-NEXT: fsw fs7, 124(a0) -; ILP32-NEXT: fsw fs6, 120(a0) -; ILP32-NEXT: fsw fs5, 116(a0) -; ILP32-NEXT: fsw fs4, 112(a0) -; ILP32-NEXT: fsw fs11, 108(a0) -; ILP32-NEXT: fsw fs10, 104(a0) -; ILP32-NEXT: fsw fs9, 100(a0) -; ILP32-NEXT: fsw fs8, 96(a0) -; ILP32-NEXT: fsw fs3, 92(a0) -; ILP32-NEXT: fsw fs2, 88(a0) -; ILP32-NEXT: fsw fs1, 84(a0) -; ILP32-NEXT: fsw fs0, 80(a0) +; ILP32-NEXT: flw fs0, 112(a0) +; ILP32-NEXT: flw fs1, 116(a0) +; ILP32-NEXT: flw fs2, 120(a0) +; ILP32-NEXT: flw fs3, 124(a0) +; ILP32-NEXT: flw fs4, 96(a0) +; ILP32-NEXT: flw fs5, 100(a0) +; ILP32-NEXT: flw fs6, 104(a0) +; ILP32-NEXT: flw fs7, 108(a0) +; ILP32-NEXT: flw fs8, 80(a0) +; ILP32-NEXT: flw fs9, 84(a0) +; ILP32-NEXT: flw fs10, 88(a0) +; ILP32-NEXT: flw fs11, 92(a0) +; ILP32-NEXT: fsw fs3, 124(a0) +; ILP32-NEXT: fsw fs2, 120(a0) +; ILP32-NEXT: fsw fs1, 116(a0) +; ILP32-NEXT: fsw fs0, 112(a0) +; ILP32-NEXT: fsw fs7, 108(a0) +; ILP32-NEXT: fsw fs6, 104(a0) +; ILP32-NEXT: fsw fs5, 100(a0) +; ILP32-NEXT: fsw fs4, 96(a0) +; ILP32-NEXT: fsw fs11, 92(a0) +; ILP32-NEXT: fsw fs10, 88(a0) +; ILP32-NEXT: fsw fs9, 84(a0) +; ILP32-NEXT: fsw fs8, 80(a0) ; ILP32-NEXT: fsw ft11, 76(a0) ; ILP32-NEXT: fsw ft10, 72(a0) ; ILP32-NEXT: fsw ft9, 68(a0) @@ -119,30 +119,30 @@ define void @callee() nounwind { ; ILP32E-NEXT: flw ft9, 68(a0) ; ILP32E-NEXT: flw ft10, 72(a0) ; ILP32E-NEXT: flw ft11, 76(a0) -; ILP32E-NEXT: flw fs0, 80(a0) -; ILP32E-NEXT: flw fs1, 84(a0) -; ILP32E-NEXT: flw fs2, 88(a0) -; ILP32E-NEXT: flw fs3, 92(a0) -; ILP32E-NEXT: flw fs4, 112(a0) -; ILP32E-NEXT: flw fs5, 116(a0) -; ILP32E-NEXT: flw fs6, 120(a0) -; ILP32E-NEXT: flw fs7, 124(a0) -; ILP32E-NEXT: flw fs8, 96(a0) -; ILP32E-NEXT: flw fs9, 100(a0) -; ILP32E-NEXT: flw fs10, 104(a0) -; ILP32E-NEXT: flw fs11, 108(a0) -; ILP32E-NEXT: fsw fs7, 124(a0) -; ILP32E-NEXT: fsw fs6, 120(a0) -; ILP32E-NEXT: fsw fs5, 116(a0) -; ILP32E-NEXT: fsw fs4, 112(a0) -; ILP32E-NEXT: fsw fs11, 108(a0) -; ILP32E-NEXT: fsw fs10, 104(a0) -; ILP32E-NEXT: fsw fs9, 100(a0) -; ILP32E-NEXT: fsw fs8, 96(a0) -; ILP32E-NEXT: fsw fs3, 92(a0) -; ILP32E-NEXT: fsw fs2, 88(a0) -; ILP32E-NEXT: fsw fs1, 84(a0) -; ILP32E-NEXT: fsw fs0, 80(a0) +; ILP32E-NEXT: flw fs0, 112(a0) +; ILP32E-NEXT: flw fs1, 116(a0) +; ILP32E-NEXT: flw fs2, 120(a0) +; ILP32E-NEXT: flw fs3, 124(a0) +; ILP32E-NEXT: flw fs4, 96(a0) +; ILP32E-NEXT: flw fs5, 100(a0) +; ILP32E-NEXT: flw fs6, 104(a0) +; ILP32E-NEXT: flw fs7, 108(a0) +; ILP32E-NEXT: flw fs8, 80(a0) +; ILP32E-NEXT: flw fs9, 84(a0) +; ILP32E-NEXT: flw fs10, 88(a0) +; ILP32E-NEXT: flw fs11, 92(a0) +; ILP32E-NEXT: fsw fs3, 124(a0) +; ILP32E-NEXT: fsw fs2, 120(a0) +; ILP32E-NEXT: fsw fs1, 116(a0) +; ILP32E-NEXT: fsw fs0, 112(a0) +; ILP32E-NEXT: fsw fs7, 108(a0) +; ILP32E-NEXT: fsw fs6, 104(a0) +; ILP32E-NEXT: fsw fs5, 100(a0) +; ILP32E-NEXT: fsw fs4, 96(a0) +; ILP32E-NEXT: fsw fs11, 92(a0) +; ILP32E-NEXT: fsw fs10, 88(a0) +; ILP32E-NEXT: fsw fs9, 84(a0) +; ILP32E-NEXT: fsw fs8, 80(a0) ; ILP32E-NEXT: fsw ft11, 76(a0) ; ILP32E-NEXT: fsw ft10, 72(a0) ; ILP32E-NEXT: fsw ft9, 68(a0) @@ -189,30 +189,30 @@ define void @callee() nounwind { ; LP64-NEXT: flw ft9, 68(a0) ; LP64-NEXT: flw ft10, 72(a0) ; LP64-NEXT: flw ft11, 76(a0) -; LP64-NEXT: flw fs0, 80(a0) -; LP64-NEXT: flw fs1, 84(a0) -; LP64-NEXT: flw fs2, 88(a0) -; LP64-NEXT: flw fs3, 92(a0) -; LP64-NEXT: flw fs4, 112(a0) -; LP64-NEXT: flw fs5, 116(a0) -; LP64-NEXT: flw fs6, 120(a0) -; LP64-NEXT: flw fs7, 124(a0) -; LP64-NEXT: flw fs8, 96(a0) -; LP64-NEXT: flw fs9, 100(a0) -; LP64-NEXT: flw fs10, 104(a0) -; LP64-NEXT: flw fs11, 108(a0) -; LP64-NEXT: fsw fs7, 124(a0) -; LP64-NEXT: fsw fs6, 120(a0) -; LP64-NEXT: fsw fs5, 116(a0) -; LP64-NEXT: fsw fs4, 112(a0) -; LP64-NEXT: fsw fs11, 108(a0) -; LP64-NEXT: fsw fs10, 104(a0) -; LP64-NEXT: fsw fs9, 100(a0) -; LP64-NEXT: fsw fs8, 96(a0) -; LP64-NEXT: fsw fs3, 92(a0) -; LP64-NEXT: fsw fs2, 88(a0) -; LP64-NEXT: fsw fs1, 84(a0) -; LP64-NEXT: fsw fs0, 80(a0) +; LP64-NEXT: flw fs0, 112(a0) +; LP64-NEXT: flw fs1, 116(a0) +; LP64-NEXT: flw fs2, 120(a0) +; LP64-NEXT: flw fs3, 124(a0) +; LP64-NEXT: flw fs4, 96(a0) +; LP64-NEXT: flw fs5, 100(a0) +; LP64-NEXT: flw fs6, 104(a0) +; LP64-NEXT: flw fs7, 108(a0) +; LP64-NEXT: flw fs8, 80(a0) +; LP64-NEXT: flw fs9, 84(a0) +; LP64-NEXT: flw fs10, 88(a0) +; LP64-NEXT: flw fs11, 92(a0) +; LP64-NEXT: fsw fs3, 124(a0) +; LP64-NEXT: fsw fs2, 120(a0) +; LP64-NEXT: fsw fs1, 116(a0) +; LP64-NEXT: fsw fs0, 112(a0) +; LP64-NEXT: fsw fs7, 108(a0) +; LP64-NEXT: fsw fs6, 104(a0) +; LP64-NEXT: fsw fs5, 100(a0) +; LP64-NEXT: fsw fs4, 96(a0) +; LP64-NEXT: fsw fs11, 92(a0) +; LP64-NEXT: fsw fs10, 88(a0) +; LP64-NEXT: fsw fs9, 84(a0) +; LP64-NEXT: fsw fs8, 80(a0) ; LP64-NEXT: fsw ft11, 76(a0) ; LP64-NEXT: fsw ft10, 72(a0) ; LP64-NEXT: fsw ft9, 68(a0) @@ -259,30 +259,30 @@ define void @callee() nounwind { ; LP64E-NEXT: flw ft9, 68(a0) ; LP64E-NEXT: flw ft10, 72(a0) ; LP64E-NEXT: flw ft11, 76(a0) -; LP64E-NEXT: flw fs0, 80(a0) -; LP64E-NEXT: flw fs1, 84(a0) -; LP64E-NEXT: flw fs2, 88(a0) -; LP64E-NEXT: flw fs3, 92(a0) -; LP64E-NEXT: flw fs4, 112(a0) -; LP64E-NEXT: flw fs5, 116(a0) -; LP64E-NEXT: flw fs6, 120(a0) -; LP64E-NEXT: flw fs7, 124(a0) -; LP64E-NEXT: flw fs8, 96(a0) -; LP64E-NEXT: flw fs9, 100(a0) -; LP64E-NEXT: flw fs10, 104(a0) -; LP64E-NEXT: flw fs11, 108(a0) -; LP64E-NEXT: fsw fs7, 124(a0) -; LP64E-NEXT: fsw fs6, 120(a0) -; LP64E-NEXT: fsw fs5, 116(a0) -; LP64E-NEXT: fsw fs4, 112(a0) -; LP64E-NEXT: fsw fs11, 108(a0) -; LP64E-NEXT: fsw fs10, 104(a0) -; LP64E-NEXT: fsw fs9, 100(a0) -; LP64E-NEXT: fsw fs8, 96(a0) -; LP64E-NEXT: fsw fs3, 92(a0) -; LP64E-NEXT: fsw fs2, 88(a0) -; LP64E-NEXT: fsw fs1, 84(a0) -; LP64E-NEXT: fsw fs0, 80(a0) +; LP64E-NEXT: flw fs0, 112(a0) +; LP64E-NEXT: flw fs1, 116(a0) +; LP64E-NEXT: flw fs2, 120(a0) +; LP64E-NEXT: flw fs3, 124(a0) +; LP64E-NEXT: flw fs4, 96(a0) +; LP64E-NEXT: flw fs5, 100(a0) +; LP64E-NEXT: flw fs6, 104(a0) +; LP64E-NEXT: flw fs7, 108(a0) +; LP64E-NEXT: flw fs8, 80(a0) +; LP64E-NEXT: flw fs9, 84(a0) +; LP64E-NEXT: flw fs10, 88(a0) +; LP64E-NEXT: flw fs11, 92(a0) +; LP64E-NEXT: fsw fs3, 124(a0) +; LP64E-NEXT: fsw fs2, 120(a0) +; LP64E-NEXT: fsw fs1, 116(a0) +; LP64E-NEXT: fsw fs0, 112(a0) +; LP64E-NEXT: fsw fs7, 108(a0) +; LP64E-NEXT: fsw fs6, 104(a0) +; LP64E-NEXT: fsw fs5, 100(a0) +; LP64E-NEXT: fsw fs4, 96(a0) +; LP64E-NEXT: fsw fs11, 92(a0) +; LP64E-NEXT: fsw fs10, 88(a0) +; LP64E-NEXT: fsw fs9, 84(a0) +; LP64E-NEXT: fsw fs8, 80(a0) ; LP64E-NEXT: fsw ft11, 76(a0) ; LP64E-NEXT: fsw ft10, 72(a0) ; LP64E-NEXT: fsw ft9, 68(a0) @@ -342,30 +342,30 @@ define void @callee() nounwind { ; ILP32F-NEXT: flw ft9, 68(a0) ; ILP32F-NEXT: flw ft10, 72(a0) ; ILP32F-NEXT: flw ft11, 76(a0) -; ILP32F-NEXT: flw fs0, 80(a0) -; ILP32F-NEXT: flw fs1, 84(a0) -; ILP32F-NEXT: flw fs2, 88(a0) -; ILP32F-NEXT: flw fs3, 92(a0) -; ILP32F-NEXT: flw fs4, 112(a0) -; ILP32F-NEXT: flw fs5, 116(a0) -; ILP32F-NEXT: flw fs6, 120(a0) -; ILP32F-NEXT: flw fs7, 124(a0) -; ILP32F-NEXT: flw fs8, 96(a0) -; ILP32F-NEXT: flw fs9, 100(a0) -; ILP32F-NEXT: flw fs10, 104(a0) -; ILP32F-NEXT: flw fs11, 108(a0) -; ILP32F-NEXT: fsw fs7, 124(a0) -; ILP32F-NEXT: fsw fs6, 120(a0) -; ILP32F-NEXT: fsw fs5, 116(a0) -; ILP32F-NEXT: fsw fs4, 112(a0) -; ILP32F-NEXT: fsw fs11, 108(a0) -; ILP32F-NEXT: fsw fs10, 104(a0) -; ILP32F-NEXT: fsw fs9, 100(a0) -; ILP32F-NEXT: fsw fs8, 96(a0) -; ILP32F-NEXT: fsw fs3, 92(a0) -; ILP32F-NEXT: fsw fs2, 88(a0) -; ILP32F-NEXT: fsw fs1, 84(a0) -; ILP32F-NEXT: fsw fs0, 80(a0) +; ILP32F-NEXT: flw fs0, 112(a0) +; ILP32F-NEXT: flw fs1, 116(a0) +; ILP32F-NEXT: flw fs2, 120(a0) +; ILP32F-NEXT: flw fs3, 124(a0) +; ILP32F-NEXT: flw fs4, 96(a0) +; ILP32F-NEXT: flw fs5, 100(a0) +; ILP32F-NEXT: flw fs6, 104(a0) +; ILP32F-NEXT: flw fs7, 108(a0) +; ILP32F-NEXT: flw fs8, 80(a0) +; ILP32F-NEXT: flw fs9, 84(a0) +; ILP32F-NEXT: flw fs10, 88(a0) +; ILP32F-NEXT: flw fs11, 92(a0) +; ILP32F-NEXT: fsw fs3, 124(a0) +; ILP32F-NEXT: fsw fs2, 120(a0) +; ILP32F-NEXT: fsw fs1, 116(a0) +; ILP32F-NEXT: fsw fs0, 112(a0) +; ILP32F-NEXT: fsw fs7, 108(a0) +; ILP32F-NEXT: fsw fs6, 104(a0) +; ILP32F-NEXT: fsw fs5, 100(a0) +; ILP32F-NEXT: fsw fs4, 96(a0) +; ILP32F-NEXT: fsw fs11, 92(a0) +; ILP32F-NEXT: fsw fs10, 88(a0) +; ILP32F-NEXT: fsw fs9, 84(a0) +; ILP32F-NEXT: fsw fs8, 80(a0) ; ILP32F-NEXT: fsw ft11, 76(a0) ; ILP32F-NEXT: fsw ft10, 72(a0) ; ILP32F-NEXT: fsw ft9, 68(a0) @@ -438,30 +438,30 @@ define void @callee() nounwind { ; LP64F-NEXT: flw ft9, 68(a0) ; LP64F-NEXT: flw ft10, 72(a0) ; LP64F-NEXT: flw ft11, 76(a0) -; LP64F-NEXT: flw fs0, 80(a0) -; LP64F-NEXT: flw fs1, 84(a0) -; LP64F-NEXT: flw fs2, 88(a0) -; LP64F-NEXT: flw fs3, 92(a0) -; LP64F-NEXT: flw fs4, 112(a0) -; LP64F-NEXT: flw fs5, 116(a0) -; LP64F-NEXT: flw fs6, 120(a0) -; LP64F-NEXT: flw fs7, 124(a0) -; LP64F-NEXT: flw fs8, 96(a0) -; LP64F-NEXT: flw fs9, 100(a0) -; LP64F-NEXT: flw fs10, 104(a0) -; LP64F-NEXT: flw fs11, 108(a0) -; LP64F-NEXT: fsw fs7, 124(a0) -; LP64F-NEXT: fsw fs6, 120(a0) -; LP64F-NEXT: fsw fs5, 116(a0) -; LP64F-NEXT: fsw fs4, 112(a0) -; LP64F-NEXT: fsw fs11, 108(a0) -; LP64F-NEXT: fsw fs10, 104(a0) -; LP64F-NEXT: fsw fs9, 100(a0) -; LP64F-NEXT: fsw fs8, 96(a0) -; LP64F-NEXT: fsw fs3, 92(a0) -; LP64F-NEXT: fsw fs2, 88(a0) -; LP64F-NEXT: fsw fs1, 84(a0) -; LP64F-NEXT: fsw fs0, 80(a0) +; LP64F-NEXT: flw fs0, 112(a0) +; LP64F-NEXT: flw fs1, 116(a0) +; LP64F-NEXT: flw fs2, 120(a0) +; LP64F-NEXT: flw fs3, 124(a0) +; LP64F-NEXT: flw fs4, 96(a0) +; LP64F-NEXT: flw fs5, 100(a0) +; LP64F-NEXT: flw fs6, 104(a0) +; LP64F-NEXT: flw fs7, 108(a0) +; LP64F-NEXT: flw fs8, 80(a0) +; LP64F-NEXT: flw fs9, 84(a0) +; LP64F-NEXT: flw fs10, 88(a0) +; LP64F-NEXT: flw fs11, 92(a0) +; LP64F-NEXT: fsw fs3, 124(a0) +; LP64F-NEXT: fsw fs2, 120(a0) +; LP64F-NEXT: fsw fs1, 116(a0) +; LP64F-NEXT: fsw fs0, 112(a0) +; LP64F-NEXT: fsw fs7, 108(a0) +; LP64F-NEXT: fsw fs6, 104(a0) +; LP64F-NEXT: fsw fs5, 100(a0) +; LP64F-NEXT: fsw fs4, 96(a0) +; LP64F-NEXT: fsw fs11, 92(a0) +; LP64F-NEXT: fsw fs10, 88(a0) +; LP64F-NEXT: fsw fs9, 84(a0) +; LP64F-NEXT: fsw fs8, 80(a0) ; LP64F-NEXT: fsw ft11, 76(a0) ; LP64F-NEXT: fsw ft10, 72(a0) ; LP64F-NEXT: fsw ft9, 68(a0) @@ -534,30 +534,30 @@ define void @callee() nounwind { ; ILP32D-NEXT: flw ft9, 68(a0) ; ILP32D-NEXT: flw ft10, 72(a0) ; ILP32D-NEXT: flw ft11, 76(a0) -; ILP32D-NEXT: flw fs0, 80(a0) -; ILP32D-NEXT: flw fs1, 84(a0) -; ILP32D-NEXT: flw fs2, 88(a0) -; ILP32D-NEXT: flw fs3, 92(a0) -; ILP32D-NEXT: flw fs4, 112(a0) -; ILP32D-NEXT: flw fs5, 116(a0) -; ILP32D-NEXT: flw fs6, 120(a0) -; ILP32D-NEXT: flw fs7, 124(a0) -; ILP32D-NEXT: flw fs8, 96(a0) -; ILP32D-NEXT: flw fs9, 100(a0) -; ILP32D-NEXT: flw fs10, 104(a0) -; ILP32D-NEXT: flw fs11, 108(a0) -; ILP32D-NEXT: fsw fs7, 124(a0) -; ILP32D-NEXT: fsw fs6, 120(a0) -; ILP32D-NEXT: fsw fs5, 116(a0) -; ILP32D-NEXT: fsw fs4, 112(a0) -; ILP32D-NEXT: fsw fs11, 108(a0) -; ILP32D-NEXT: fsw fs10, 104(a0) -; ILP32D-NEXT: fsw fs9, 100(a0) -; ILP32D-NEXT: fsw fs8, 96(a0) -; ILP32D-NEXT: fsw fs3, 92(a0) -; ILP32D-NEXT: fsw fs2, 88(a0) -; ILP32D-NEXT: fsw fs1, 84(a0) -; ILP32D-NEXT: fsw fs0, 80(a0) +; ILP32D-NEXT: flw fs0, 112(a0) +; ILP32D-NEXT: flw fs1, 116(a0) +; ILP32D-NEXT: flw fs2, 120(a0) +; ILP32D-NEXT: flw fs3, 124(a0) +; ILP32D-NEXT: flw fs4, 96(a0) +; ILP32D-NEXT: flw fs5, 100(a0) +; ILP32D-NEXT: flw fs6, 104(a0) +; ILP32D-NEXT: flw fs7, 108(a0) +; ILP32D-NEXT: flw fs8, 80(a0) +; ILP32D-NEXT: flw fs9, 84(a0) +; ILP32D-NEXT: flw fs10, 88(a0) +; ILP32D-NEXT: flw fs11, 92(a0) +; ILP32D-NEXT: fsw fs3, 124(a0) +; ILP32D-NEXT: fsw fs2, 120(a0) +; ILP32D-NEXT: fsw fs1, 116(a0) +; ILP32D-NEXT: fsw fs0, 112(a0) +; ILP32D-NEXT: fsw fs7, 108(a0) +; ILP32D-NEXT: fsw fs6, 104(a0) +; ILP32D-NEXT: fsw fs5, 100(a0) +; ILP32D-NEXT: fsw fs4, 96(a0) +; ILP32D-NEXT: fsw fs11, 92(a0) +; ILP32D-NEXT: fsw fs10, 88(a0) +; ILP32D-NEXT: fsw fs9, 84(a0) +; ILP32D-NEXT: fsw fs8, 80(a0) ; ILP32D-NEXT: fsw ft11, 76(a0) ; ILP32D-NEXT: fsw ft10, 72(a0) ; ILP32D-NEXT: fsw ft9, 68(a0) @@ -630,30 +630,30 @@ define void @callee() nounwind { ; LP64D-NEXT: flw ft9, 68(a0) ; LP64D-NEXT: flw ft10, 72(a0) ; LP64D-NEXT: flw ft11, 76(a0) -; LP64D-NEXT: flw fs0, 80(a0) -; LP64D-NEXT: flw fs1, 84(a0) -; LP64D-NEXT: flw fs2, 88(a0) -; LP64D-NEXT: flw fs3, 92(a0) -; LP64D-NEXT: flw fs4, 112(a0) -; LP64D-NEXT: flw fs5, 116(a0) -; LP64D-NEXT: flw fs6, 120(a0) -; LP64D-NEXT: flw fs7, 124(a0) -; LP64D-NEXT: flw fs8, 96(a0) -; LP64D-NEXT: flw fs9, 100(a0) -; LP64D-NEXT: flw fs10, 104(a0) -; LP64D-NEXT: flw fs11, 108(a0) -; LP64D-NEXT: fsw fs7, 124(a0) -; LP64D-NEXT: fsw fs6, 120(a0) -; LP64D-NEXT: fsw fs5, 116(a0) -; LP64D-NEXT: fsw fs4, 112(a0) -; LP64D-NEXT: fsw fs11, 108(a0) -; LP64D-NEXT: fsw fs10, 104(a0) -; LP64D-NEXT: fsw fs9, 100(a0) -; LP64D-NEXT: fsw fs8, 96(a0) -; LP64D-NEXT: fsw fs3, 92(a0) -; LP64D-NEXT: fsw fs2, 88(a0) -; LP64D-NEXT: fsw fs1, 84(a0) -; LP64D-NEXT: fsw fs0, 80(a0) +; LP64D-NEXT: flw fs0, 112(a0) +; LP64D-NEXT: flw fs1, 116(a0) +; LP64D-NEXT: flw fs2, 120(a0) +; LP64D-NEXT: flw fs3, 124(a0) +; LP64D-NEXT: flw fs4, 96(a0) +; LP64D-NEXT: flw fs5, 100(a0) +; LP64D-NEXT: flw fs6, 104(a0) +; LP64D-NEXT: flw fs7, 108(a0) +; LP64D-NEXT: flw fs8, 80(a0) +; LP64D-NEXT: flw fs9, 84(a0) +; LP64D-NEXT: flw fs10, 88(a0) +; LP64D-NEXT: flw fs11, 92(a0) +; LP64D-NEXT: fsw fs3, 124(a0) +; LP64D-NEXT: fsw fs2, 120(a0) +; LP64D-NEXT: fsw fs1, 116(a0) +; LP64D-NEXT: fsw fs0, 112(a0) +; LP64D-NEXT: fsw fs7, 108(a0) +; LP64D-NEXT: fsw fs6, 104(a0) +; LP64D-NEXT: fsw fs5, 100(a0) +; LP64D-NEXT: fsw fs4, 96(a0) +; LP64D-NEXT: fsw fs11, 92(a0) +; LP64D-NEXT: fsw fs10, 88(a0) +; LP64D-NEXT: fsw fs9, 84(a0) +; LP64D-NEXT: fsw fs8, 80(a0) ; LP64D-NEXT: fsw ft11, 76(a0) ; LP64D-NEXT: fsw ft10, 72(a0) ; LP64D-NEXT: fsw ft9, 68(a0) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll index 5820b29f73c6d..655c59d5bdabb 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -43,30 +43,30 @@ define void @callee() nounwind { ; ILP32-NEXT: fld ft9, 136(a0) ; ILP32-NEXT: fld ft10, 144(a0) ; ILP32-NEXT: fld ft11, 152(a0) -; ILP32-NEXT: fld fs0, 160(a0) -; ILP32-NEXT: fld fs1, 168(a0) -; ILP32-NEXT: fld fs2, 176(a0) -; ILP32-NEXT: fld fs3, 184(a0) -; ILP32-NEXT: fld fs4, 224(a0) -; ILP32-NEXT: fld fs5, 232(a0) -; ILP32-NEXT: fld fs6, 240(a0) -; ILP32-NEXT: fld fs7, 248(a0) -; ILP32-NEXT: fld fs8, 192(a0) -; ILP32-NEXT: fld fs9, 200(a0) -; ILP32-NEXT: fld fs10, 208(a0) -; ILP32-NEXT: fld fs11, 216(a0) -; ILP32-NEXT: fsd fs7, 248(a0) -; ILP32-NEXT: fsd fs6, 240(a0) -; ILP32-NEXT: fsd fs5, 232(a0) -; ILP32-NEXT: fsd fs4, 224(a0) -; ILP32-NEXT: fsd fs11, 216(a0) -; ILP32-NEXT: fsd fs10, 208(a0) -; ILP32-NEXT: fsd fs9, 200(a0) -; ILP32-NEXT: fsd fs8, 192(a0) -; ILP32-NEXT: fsd fs3, 184(a0) -; ILP32-NEXT: fsd fs2, 176(a0) -; ILP32-NEXT: fsd fs1, 168(a0) -; ILP32-NEXT: fsd fs0, 160(a0) +; ILP32-NEXT: fld fs0, 224(a0) +; ILP32-NEXT: fld fs1, 232(a0) +; ILP32-NEXT: fld fs2, 240(a0) +; ILP32-NEXT: fld fs3, 248(a0) +; ILP32-NEXT: fld fs4, 192(a0) +; ILP32-NEXT: fld fs5, 200(a0) +; ILP32-NEXT: fld fs6, 208(a0) +; ILP32-NEXT: fld fs7, 216(a0) +; ILP32-NEXT: fld fs8, 160(a0) +; ILP32-NEXT: fld fs9, 168(a0) +; ILP32-NEXT: fld fs10, 176(a0) +; ILP32-NEXT: fld fs11, 184(a0) +; ILP32-NEXT: fsd fs3, 248(a0) +; ILP32-NEXT: fsd fs2, 240(a0) +; ILP32-NEXT: fsd fs1, 232(a0) +; ILP32-NEXT: fsd fs0, 224(a0) +; ILP32-NEXT: fsd fs7, 216(a0) +; ILP32-NEXT: fsd fs6, 208(a0) +; ILP32-NEXT: fsd fs5, 200(a0) +; ILP32-NEXT: fsd fs4, 192(a0) +; ILP32-NEXT: fsd fs11, 184(a0) +; ILP32-NEXT: fsd fs10, 176(a0) +; ILP32-NEXT: fsd fs9, 168(a0) +; ILP32-NEXT: fsd fs8, 160(a0) ; ILP32-NEXT: fsd ft11, 152(a0) ; ILP32-NEXT: fsd ft10, 144(a0) ; ILP32-NEXT: fsd ft9, 136(a0) @@ -113,30 +113,30 @@ define void @callee() nounwind { ; LP64-NEXT: fld ft9, 136(a0) ; LP64-NEXT: fld ft10, 144(a0) ; LP64-NEXT: fld ft11, 152(a0) -; LP64-NEXT: fld fs0, 160(a0) -; LP64-NEXT: fld fs1, 168(a0) -; LP64-NEXT: fld fs2, 176(a0) -; LP64-NEXT: fld fs3, 184(a0) -; LP64-NEXT: fld fs4, 224(a0) -; LP64-NEXT: fld fs5, 232(a0) -; LP64-NEXT: fld fs6, 240(a0) -; LP64-NEXT: fld fs7, 248(a0) -; LP64-NEXT: fld fs8, 192(a0) -; LP64-NEXT: fld fs9, 200(a0) -; LP64-NEXT: fld fs10, 208(a0) -; LP64-NEXT: fld fs11, 216(a0) -; LP64-NEXT: fsd fs7, 248(a0) -; LP64-NEXT: fsd fs6, 240(a0) -; LP64-NEXT: fsd fs5, 232(a0) -; LP64-NEXT: fsd fs4, 224(a0) -; LP64-NEXT: fsd fs11, 216(a0) -; LP64-NEXT: fsd fs10, 208(a0) -; LP64-NEXT: fsd fs9, 200(a0) -; LP64-NEXT: fsd fs8, 192(a0) -; LP64-NEXT: fsd fs3, 184(a0) -; LP64-NEXT: fsd fs2, 176(a0) -; LP64-NEXT: fsd fs1, 168(a0) -; LP64-NEXT: fsd fs0, 160(a0) +; LP64-NEXT: fld fs0, 224(a0) +; LP64-NEXT: fld fs1, 232(a0) +; LP64-NEXT: fld fs2, 240(a0) +; LP64-NEXT: fld fs3, 248(a0) +; LP64-NEXT: fld fs4, 192(a0) +; LP64-NEXT: fld fs5, 200(a0) +; LP64-NEXT: fld fs6, 208(a0) +; LP64-NEXT: fld fs7, 216(a0) +; LP64-NEXT: fld fs8, 160(a0) +; LP64-NEXT: fld fs9, 168(a0) +; LP64-NEXT: fld fs10, 176(a0) +; LP64-NEXT: fld fs11, 184(a0) +; LP64-NEXT: fsd fs3, 248(a0) +; LP64-NEXT: fsd fs2, 240(a0) +; LP64-NEXT: fsd fs1, 232(a0) +; LP64-NEXT: fsd fs0, 224(a0) +; LP64-NEXT: fsd fs7, 216(a0) +; LP64-NEXT: fsd fs6, 208(a0) +; LP64-NEXT: fsd fs5, 200(a0) +; LP64-NEXT: fsd fs4, 192(a0) +; LP64-NEXT: fsd fs11, 184(a0) +; LP64-NEXT: fsd fs10, 176(a0) +; LP64-NEXT: fsd fs9, 168(a0) +; LP64-NEXT: fsd fs8, 160(a0) ; LP64-NEXT: fsd ft11, 152(a0) ; LP64-NEXT: fsd ft10, 144(a0) ; LP64-NEXT: fsd ft9, 136(a0) @@ -183,30 +183,30 @@ define void @callee() nounwind { ; LP64E-NEXT: fld ft9, 136(a0) ; LP64E-NEXT: fld ft10, 144(a0) ; LP64E-NEXT: fld ft11, 152(a0) -; LP64E-NEXT: fld fs0, 160(a0) -; LP64E-NEXT: fld fs1, 168(a0) -; LP64E-NEXT: fld fs2, 176(a0) -; LP64E-NEXT: fld fs3, 184(a0) -; LP64E-NEXT: fld fs4, 224(a0) -; LP64E-NEXT: fld fs5, 232(a0) -; LP64E-NEXT: fld fs6, 240(a0) -; LP64E-NEXT: fld fs7, 248(a0) -; LP64E-NEXT: fld fs8, 192(a0) -; LP64E-NEXT: fld fs9, 200(a0) -; LP64E-NEXT: fld fs10, 208(a0) -; LP64E-NEXT: fld fs11, 216(a0) -; LP64E-NEXT: fsd fs7, 248(a0) -; LP64E-NEXT: fsd fs6, 240(a0) -; LP64E-NEXT: fsd fs5, 232(a0) -; LP64E-NEXT: fsd fs4, 224(a0) -; LP64E-NEXT: fsd fs11, 216(a0) -; LP64E-NEXT: fsd fs10, 208(a0) -; LP64E-NEXT: fsd fs9, 200(a0) -; LP64E-NEXT: fsd fs8, 192(a0) -; LP64E-NEXT: fsd fs3, 184(a0) -; LP64E-NEXT: fsd fs2, 176(a0) -; LP64E-NEXT: fsd fs1, 168(a0) -; LP64E-NEXT: fsd fs0, 160(a0) +; LP64E-NEXT: fld fs0, 224(a0) +; LP64E-NEXT: fld fs1, 232(a0) +; LP64E-NEXT: fld fs2, 240(a0) +; LP64E-NEXT: fld fs3, 248(a0) +; LP64E-NEXT: fld fs4, 192(a0) +; LP64E-NEXT: fld fs5, 200(a0) +; LP64E-NEXT: fld fs6, 208(a0) +; LP64E-NEXT: fld fs7, 216(a0) +; LP64E-NEXT: fld fs8, 160(a0) +; LP64E-NEXT: fld fs9, 168(a0) +; LP64E-NEXT: fld fs10, 176(a0) +; LP64E-NEXT: fld fs11, 184(a0) +; LP64E-NEXT: fsd fs3, 248(a0) +; LP64E-NEXT: fsd fs2, 240(a0) +; LP64E-NEXT: fsd fs1, 232(a0) +; LP64E-NEXT: fsd fs0, 224(a0) +; LP64E-NEXT: fsd fs7, 216(a0) +; LP64E-NEXT: fsd fs6, 208(a0) +; LP64E-NEXT: fsd fs5, 200(a0) +; LP64E-NEXT: fsd fs4, 192(a0) +; LP64E-NEXT: fsd fs11, 184(a0) +; LP64E-NEXT: fsd fs10, 176(a0) +; LP64E-NEXT: fsd fs9, 168(a0) +; LP64E-NEXT: fsd fs8, 160(a0) ; LP64E-NEXT: fsd ft11, 152(a0) ; LP64E-NEXT: fsd ft10, 144(a0) ; LP64E-NEXT: fsd ft9, 136(a0) @@ -266,30 +266,30 @@ define void @callee() nounwind { ; ILP32D-NEXT: fld ft9, 136(a0) ; ILP32D-NEXT: fld ft10, 144(a0) ; ILP32D-NEXT: fld ft11, 152(a0) -; ILP32D-NEXT: fld fs0, 160(a0) -; ILP32D-NEXT: fld fs1, 168(a0) -; ILP32D-NEXT: fld fs2, 176(a0) -; ILP32D-NEXT: fld fs3, 184(a0) -; ILP32D-NEXT: fld fs4, 224(a0) -; ILP32D-NEXT: fld fs5, 232(a0) -; ILP32D-NEXT: fld fs6, 240(a0) -; ILP32D-NEXT: fld fs7, 248(a0) -; ILP32D-NEXT: fld fs8, 192(a0) -; ILP32D-NEXT: fld fs9, 200(a0) -; ILP32D-NEXT: fld fs10, 208(a0) -; ILP32D-NEXT: fld fs11, 216(a0) -; ILP32D-NEXT: fsd fs7, 248(a0) -; ILP32D-NEXT: fsd fs6, 240(a0) -; ILP32D-NEXT: fsd fs5, 232(a0) -; ILP32D-NEXT: fsd fs4, 224(a0) -; ILP32D-NEXT: fsd fs11, 216(a0) -; ILP32D-NEXT: fsd fs10, 208(a0) -; ILP32D-NEXT: fsd fs9, 200(a0) -; ILP32D-NEXT: fsd fs8, 192(a0) -; ILP32D-NEXT: fsd fs3, 184(a0) -; ILP32D-NEXT: fsd fs2, 176(a0) -; ILP32D-NEXT: fsd fs1, 168(a0) -; ILP32D-NEXT: fsd fs0, 160(a0) +; ILP32D-NEXT: fld fs0, 224(a0) +; ILP32D-NEXT: fld fs1, 232(a0) +; ILP32D-NEXT: fld fs2, 240(a0) +; ILP32D-NEXT: fld fs3, 248(a0) +; ILP32D-NEXT: fld fs4, 192(a0) +; ILP32D-NEXT: fld fs5, 200(a0) +; ILP32D-NEXT: fld fs6, 208(a0) +; ILP32D-NEXT: fld fs7, 216(a0) +; ILP32D-NEXT: fld fs8, 160(a0) +; ILP32D-NEXT: fld fs9, 168(a0) +; ILP32D-NEXT: fld fs10, 176(a0) +; ILP32D-NEXT: fld fs11, 184(a0) +; ILP32D-NEXT: fsd fs3, 248(a0) +; ILP32D-NEXT: fsd fs2, 240(a0) +; ILP32D-NEXT: fsd fs1, 232(a0) +; ILP32D-NEXT: fsd fs0, 224(a0) +; ILP32D-NEXT: fsd fs7, 216(a0) +; ILP32D-NEXT: fsd fs6, 208(a0) +; ILP32D-NEXT: fsd fs5, 200(a0) +; ILP32D-NEXT: fsd fs4, 192(a0) +; ILP32D-NEXT: fsd fs11, 184(a0) +; ILP32D-NEXT: fsd fs10, 176(a0) +; ILP32D-NEXT: fsd fs9, 168(a0) +; ILP32D-NEXT: fsd fs8, 160(a0) ; ILP32D-NEXT: fsd ft11, 152(a0) ; ILP32D-NEXT: fsd ft10, 144(a0) ; ILP32D-NEXT: fsd ft9, 136(a0) @@ -362,30 +362,30 @@ define void @callee() nounwind { ; LP64D-NEXT: fld ft9, 136(a0) ; LP64D-NEXT: fld ft10, 144(a0) ; LP64D-NEXT: fld ft11, 152(a0) -; LP64D-NEXT: fld fs0, 160(a0) -; LP64D-NEXT: fld fs1, 168(a0) -; LP64D-NEXT: fld fs2, 176(a0) -; LP64D-NEXT: fld fs3, 184(a0) -; LP64D-NEXT: fld fs4, 224(a0) -; LP64D-NEXT: fld fs5, 232(a0) -; LP64D-NEXT: fld fs6, 240(a0) -; LP64D-NEXT: fld fs7, 248(a0) -; LP64D-NEXT: fld fs8, 192(a0) -; LP64D-NEXT: fld fs9, 200(a0) -; LP64D-NEXT: fld fs10, 208(a0) -; LP64D-NEXT: fld fs11, 216(a0) -; LP64D-NEXT: fsd fs7, 248(a0) -; LP64D-NEXT: fsd fs6, 240(a0) -; LP64D-NEXT: fsd fs5, 232(a0) -; LP64D-NEXT: fsd fs4, 224(a0) -; LP64D-NEXT: fsd fs11, 216(a0) -; LP64D-NEXT: fsd fs10, 208(a0) -; LP64D-NEXT: fsd fs9, 200(a0) -; LP64D-NEXT: fsd fs8, 192(a0) -; LP64D-NEXT: fsd fs3, 184(a0) -; LP64D-NEXT: fsd fs2, 176(a0) -; LP64D-NEXT: fsd fs1, 168(a0) -; LP64D-NEXT: fsd fs0, 160(a0) +; LP64D-NEXT: fld fs0, 224(a0) +; LP64D-NEXT: fld fs1, 232(a0) +; LP64D-NEXT: fld fs2, 240(a0) +; LP64D-NEXT: fld fs3, 248(a0) +; LP64D-NEXT: fld fs4, 192(a0) +; LP64D-NEXT: fld fs5, 200(a0) +; LP64D-NEXT: fld fs6, 208(a0) +; LP64D-NEXT: fld fs7, 216(a0) +; LP64D-NEXT: fld fs8, 160(a0) +; LP64D-NEXT: fld fs9, 168(a0) +; LP64D-NEXT: fld fs10, 176(a0) +; LP64D-NEXT: fld fs11, 184(a0) +; LP64D-NEXT: fsd fs3, 248(a0) +; LP64D-NEXT: fsd fs2, 240(a0) +; LP64D-NEXT: fsd fs1, 232(a0) +; LP64D-NEXT: fsd fs0, 224(a0) +; LP64D-NEXT: fsd fs7, 216(a0) +; LP64D-NEXT: fsd fs6, 208(a0) +; LP64D-NEXT: fsd fs5, 200(a0) +; LP64D-NEXT: fsd fs4, 192(a0) +; LP64D-NEXT: fsd fs11, 184(a0) +; LP64D-NEXT: fsd fs10, 176(a0) +; LP64D-NEXT: fsd fs9, 168(a0) +; LP64D-NEXT: fsd fs8, 160(a0) ; LP64D-NEXT: fsd ft11, 152(a0) ; LP64D-NEXT: fsd ft10, 144(a0) ; LP64D-NEXT: fsd ft9, 136(a0) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll index 8db4c715c41ce..2b381fa3bec4c 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -95,30 +95,30 @@ define void @callee() { ; RV32I-NEXT: lw s3, 68(a0) ; RV32I-NEXT: lw s4, 72(a0) ; RV32I-NEXT: lw s5, 76(a0) -; RV32I-NEXT: lw s6, 80(a0) -; RV32I-NEXT: lw s7, 84(a0) -; RV32I-NEXT: lw s8, 88(a0) -; RV32I-NEXT: lw s9, 92(a0) -; RV32I-NEXT: lw s10, 112(a0) -; RV32I-NEXT: lw s11, 116(a0) -; RV32I-NEXT: lw ra, 120(a0) -; RV32I-NEXT: lw a1, 124(a0) -; RV32I-NEXT: lw a5, 96(a0) -; RV32I-NEXT: lw a4, 100(a0) -; RV32I-NEXT: lw a3, 104(a0) -; RV32I-NEXT: lw a2, 108(a0) -; RV32I-NEXT: sw a1, 124(a0) -; RV32I-NEXT: sw ra, 120(a0) -; RV32I-NEXT: sw s11, 116(a0) -; RV32I-NEXT: sw s10, 112(a0) -; RV32I-NEXT: sw a2, 108(a0) -; RV32I-NEXT: sw a3, 104(a0) -; RV32I-NEXT: sw a4, 100(a0) -; RV32I-NEXT: sw a5, 96(a0) -; RV32I-NEXT: sw s9, 92(a0) -; RV32I-NEXT: sw s8, 88(a0) -; RV32I-NEXT: sw s7, 84(a0) -; RV32I-NEXT: sw s6, 80(a0) +; RV32I-NEXT: lw s6, 112(a0) +; RV32I-NEXT: lw s7, 116(a0) +; RV32I-NEXT: lw s8, 120(a0) +; RV32I-NEXT: lw s9, 124(a0) +; RV32I-NEXT: lw s10, 96(a0) +; RV32I-NEXT: lw s11, 100(a0) +; RV32I-NEXT: lw ra, 104(a0) +; RV32I-NEXT: lw a1, 108(a0) +; RV32I-NEXT: lw a5, 80(a0) +; RV32I-NEXT: lw a4, 84(a0) +; RV32I-NEXT: lw a3, 88(a0) +; RV32I-NEXT: lw a2, 92(a0) +; RV32I-NEXT: sw s9, 124(a0) +; RV32I-NEXT: sw s8, 120(a0) +; RV32I-NEXT: sw s7, 116(a0) +; RV32I-NEXT: sw s6, 112(a0) +; RV32I-NEXT: sw a1, 108(a0) +; RV32I-NEXT: sw ra, 104(a0) +; RV32I-NEXT: sw s11, 100(a0) +; RV32I-NEXT: sw s10, 96(a0) +; RV32I-NEXT: sw a2, 92(a0) +; RV32I-NEXT: sw a3, 88(a0) +; RV32I-NEXT: sw a4, 84(a0) +; RV32I-NEXT: sw a5, 80(a0) ; RV32I-NEXT: sw s5, 76(a0) ; RV32I-NEXT: sw s4, 72(a0) ; RV32I-NEXT: sw s3, 68(a0) @@ -211,30 +211,30 @@ define void @callee() { ; RV32I-ILP32E-NEXT: lw s5, 68(a0) ; RV32I-ILP32E-NEXT: lw s6, 72(a0) ; RV32I-ILP32E-NEXT: lw s7, 76(a0) -; RV32I-ILP32E-NEXT: lw s8, 80(a0) -; RV32I-ILP32E-NEXT: lw s9, 84(a0) -; RV32I-ILP32E-NEXT: lw s10, 88(a0) -; RV32I-ILP32E-NEXT: lw s11, 92(a0) -; RV32I-ILP32E-NEXT: lw s0, 112(a0) -; RV32I-ILP32E-NEXT: lw s1, 116(a0) -; RV32I-ILP32E-NEXT: lw ra, 120(a0) -; RV32I-ILP32E-NEXT: lw a1, 124(a0) -; RV32I-ILP32E-NEXT: lw a5, 96(a0) -; RV32I-ILP32E-NEXT: lw a4, 100(a0) -; RV32I-ILP32E-NEXT: lw a3, 104(a0) -; RV32I-ILP32E-NEXT: lw a2, 108(a0) -; RV32I-ILP32E-NEXT: sw a1, 124(a0) -; RV32I-ILP32E-NEXT: sw ra, 120(a0) -; RV32I-ILP32E-NEXT: sw s1, 116(a0) -; RV32I-ILP32E-NEXT: sw s0, 112(a0) -; RV32I-ILP32E-NEXT: sw a2, 108(a0) -; RV32I-ILP32E-NEXT: sw a3, 104(a0) -; RV32I-ILP32E-NEXT: sw a4, 100(a0) -; RV32I-ILP32E-NEXT: sw a5, 96(a0) -; RV32I-ILP32E-NEXT: sw s11, 92(a0) -; RV32I-ILP32E-NEXT: sw s10, 88(a0) -; RV32I-ILP32E-NEXT: sw s9, 84(a0) -; RV32I-ILP32E-NEXT: sw s8, 80(a0) +; RV32I-ILP32E-NEXT: lw s8, 112(a0) +; RV32I-ILP32E-NEXT: lw s9, 116(a0) +; RV32I-ILP32E-NEXT: lw s10, 120(a0) +; RV32I-ILP32E-NEXT: lw s11, 124(a0) +; RV32I-ILP32E-NEXT: lw s0, 96(a0) +; RV32I-ILP32E-NEXT: lw s1, 100(a0) +; RV32I-ILP32E-NEXT: lw ra, 104(a0) +; RV32I-ILP32E-NEXT: lw a1, 108(a0) +; RV32I-ILP32E-NEXT: lw a5, 80(a0) +; RV32I-ILP32E-NEXT: lw a4, 84(a0) +; RV32I-ILP32E-NEXT: lw a3, 88(a0) +; RV32I-ILP32E-NEXT: lw a2, 92(a0) +; RV32I-ILP32E-NEXT: sw s11, 124(a0) +; RV32I-ILP32E-NEXT: sw s10, 120(a0) +; RV32I-ILP32E-NEXT: sw s9, 116(a0) +; RV32I-ILP32E-NEXT: sw s8, 112(a0) +; RV32I-ILP32E-NEXT: sw a1, 108(a0) +; RV32I-ILP32E-NEXT: sw ra, 104(a0) +; RV32I-ILP32E-NEXT: sw s1, 100(a0) +; RV32I-ILP32E-NEXT: sw s0, 96(a0) +; RV32I-ILP32E-NEXT: sw a2, 92(a0) +; RV32I-ILP32E-NEXT: sw a3, 88(a0) +; RV32I-ILP32E-NEXT: sw a4, 84(a0) +; RV32I-ILP32E-NEXT: sw a5, 80(a0) ; RV32I-ILP32E-NEXT: sw s7, 76(a0) ; RV32I-ILP32E-NEXT: sw s6, 72(a0) ; RV32I-ILP32E-NEXT: sw s5, 68(a0) @@ -330,30 +330,30 @@ define void @callee() { ; RV32I-WITH-FP-NEXT: lw s4, 68(a0) ; RV32I-WITH-FP-NEXT: lw s5, 72(a0) ; RV32I-WITH-FP-NEXT: lw s6, 76(a0) -; RV32I-WITH-FP-NEXT: lw s7, 80(a0) -; RV32I-WITH-FP-NEXT: lw s8, 84(a0) -; RV32I-WITH-FP-NEXT: lw s9, 88(a0) -; RV32I-WITH-FP-NEXT: lw s10, 92(a0) -; RV32I-WITH-FP-NEXT: lw s11, 112(a0) -; RV32I-WITH-FP-NEXT: lw ra, 116(a0) -; RV32I-WITH-FP-NEXT: lw a4, 120(a0) -; RV32I-WITH-FP-NEXT: lw a1, 124(a0) -; RV32I-WITH-FP-NEXT: lw a6, 96(a0) -; RV32I-WITH-FP-NEXT: lw a5, 100(a0) -; RV32I-WITH-FP-NEXT: lw a3, 104(a0) -; RV32I-WITH-FP-NEXT: lw a2, 108(a0) -; RV32I-WITH-FP-NEXT: sw a1, 124(a0) -; RV32I-WITH-FP-NEXT: sw a4, 120(a0) -; RV32I-WITH-FP-NEXT: sw ra, 116(a0) -; RV32I-WITH-FP-NEXT: sw s11, 112(a0) -; RV32I-WITH-FP-NEXT: sw a2, 108(a0) -; RV32I-WITH-FP-NEXT: sw a3, 104(a0) -; RV32I-WITH-FP-NEXT: sw a5, 100(a0) -; RV32I-WITH-FP-NEXT: sw a6, 96(a0) -; RV32I-WITH-FP-NEXT: sw s10, 92(a0) -; RV32I-WITH-FP-NEXT: sw s9, 88(a0) -; RV32I-WITH-FP-NEXT: sw s8, 84(a0) -; RV32I-WITH-FP-NEXT: sw s7, 80(a0) +; RV32I-WITH-FP-NEXT: lw s7, 112(a0) +; RV32I-WITH-FP-NEXT: lw s8, 116(a0) +; RV32I-WITH-FP-NEXT: lw s9, 120(a0) +; RV32I-WITH-FP-NEXT: lw s10, 124(a0) +; RV32I-WITH-FP-NEXT: lw s11, 96(a0) +; RV32I-WITH-FP-NEXT: lw ra, 100(a0) +; RV32I-WITH-FP-NEXT: lw a4, 104(a0) +; RV32I-WITH-FP-NEXT: lw a1, 108(a0) +; RV32I-WITH-FP-NEXT: lw a6, 80(a0) +; RV32I-WITH-FP-NEXT: lw a5, 84(a0) +; RV32I-WITH-FP-NEXT: lw a3, 88(a0) +; RV32I-WITH-FP-NEXT: lw a2, 92(a0) +; RV32I-WITH-FP-NEXT: sw s10, 124(a0) +; RV32I-WITH-FP-NEXT: sw s9, 120(a0) +; RV32I-WITH-FP-NEXT: sw s8, 116(a0) +; RV32I-WITH-FP-NEXT: sw s7, 112(a0) +; RV32I-WITH-FP-NEXT: sw a1, 108(a0) +; RV32I-WITH-FP-NEXT: sw a4, 104(a0) +; RV32I-WITH-FP-NEXT: sw ra, 100(a0) +; RV32I-WITH-FP-NEXT: sw s11, 96(a0) +; RV32I-WITH-FP-NEXT: sw a2, 92(a0) +; RV32I-WITH-FP-NEXT: sw a3, 88(a0) +; RV32I-WITH-FP-NEXT: sw a5, 84(a0) +; RV32I-WITH-FP-NEXT: sw a6, 80(a0) ; RV32I-WITH-FP-NEXT: sw s6, 76(a0) ; RV32I-WITH-FP-NEXT: sw s5, 72(a0) ; RV32I-WITH-FP-NEXT: sw s4, 68(a0) @@ -455,29 +455,29 @@ define void @callee() { ; RV32IZCMP-NEXT: lw s10, 68(a0) ; RV32IZCMP-NEXT: lw s11, 72(a0) ; RV32IZCMP-NEXT: lw ra, 76(a0) -; RV32IZCMP-NEXT: lw t2, 80(a0) -; RV32IZCMP-NEXT: lw s0, 84(a0) -; RV32IZCMP-NEXT: lw s1, 88(a0) -; RV32IZCMP-NEXT: lw t1, 92(a0) ; RV32IZCMP-NEXT: lw t0, 112(a0) -; RV32IZCMP-NEXT: lw a5, 116(a0) -; RV32IZCMP-NEXT: lw a3, 120(a0) -; RV32IZCMP-NEXT: lw a1, 124(a0) -; RV32IZCMP-NEXT: lw a7, 96(a0) +; RV32IZCMP-NEXT: lw s0, 116(a0) +; RV32IZCMP-NEXT: lw s1, 120(a0) +; RV32IZCMP-NEXT: lw a5, 124(a0) +; RV32IZCMP-NEXT: lw t1, 96(a0) ; RV32IZCMP-NEXT: lw a6, 100(a0) -; RV32IZCMP-NEXT: lw a4, 104(a0) -; RV32IZCMP-NEXT: lw a2, 108(a0) -; RV32IZCMP-NEXT: sw a1, 124(a0) -; RV32IZCMP-NEXT: sw a3, 120(a0) -; RV32IZCMP-NEXT: sw a5, 116(a0) +; RV32IZCMP-NEXT: lw a3, 104(a0) +; RV32IZCMP-NEXT: lw a1, 108(a0) +; RV32IZCMP-NEXT: lw t2, 80(a0) +; RV32IZCMP-NEXT: lw a7, 84(a0) +; RV32IZCMP-NEXT: lw a4, 88(a0) +; RV32IZCMP-NEXT: lw a2, 92(a0) +; RV32IZCMP-NEXT: sw a5, 124(a0) +; RV32IZCMP-NEXT: sw s1, 120(a0) +; RV32IZCMP-NEXT: sw s0, 116(a0) ; RV32IZCMP-NEXT: sw t0, 112(a0) -; RV32IZCMP-NEXT: sw a2, 108(a0) -; RV32IZCMP-NEXT: sw a4, 104(a0) +; RV32IZCMP-NEXT: sw a1, 108(a0) +; RV32IZCMP-NEXT: sw a3, 104(a0) ; RV32IZCMP-NEXT: sw a6, 100(a0) -; RV32IZCMP-NEXT: sw a7, 96(a0) -; RV32IZCMP-NEXT: sw t1, 92(a0) -; RV32IZCMP-NEXT: sw s1, 88(a0) -; RV32IZCMP-NEXT: sw s0, 84(a0) +; RV32IZCMP-NEXT: sw t1, 96(a0) +; RV32IZCMP-NEXT: sw a2, 92(a0) +; RV32IZCMP-NEXT: sw a4, 88(a0) +; RV32IZCMP-NEXT: sw a7, 84(a0) ; RV32IZCMP-NEXT: sw t2, 80(a0) ; RV32IZCMP-NEXT: sw ra, 76(a0) ; RV32IZCMP-NEXT: sw s11, 72(a0) @@ -566,30 +566,30 @@ define void @callee() { ; RV32IZCMP-WITH-FP-NEXT: lw s10, 68(a0) ; RV32IZCMP-WITH-FP-NEXT: lw s11, 72(a0) ; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(a0) -; RV32IZCMP-WITH-FP-NEXT: lw s1, 80(a0) -; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a0) -; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a0) -; RV32IZCMP-WITH-FP-NEXT: lw t1, 92(a0) -; RV32IZCMP-WITH-FP-NEXT: lw t0, 112(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a6, 116(a0) +; RV32IZCMP-WITH-FP-NEXT: lw s1, 112(a0) +; RV32IZCMP-WITH-FP-NEXT: lw t0, 116(a0) ; RV32IZCMP-WITH-FP-NEXT: lw a4, 120(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a1, 124(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a7, 96(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a5, 100(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a3, 104(a0) -; RV32IZCMP-WITH-FP-NEXT: lw a2, 108(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a1, 124(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a2, 124(a0) +; RV32IZCMP-WITH-FP-NEXT: lw t3, 96(a0) +; RV32IZCMP-WITH-FP-NEXT: lw t1, 100(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a6, 104(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a1, 108(a0) +; RV32IZCMP-WITH-FP-NEXT: lw t2, 80(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a7, 84(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a5, 88(a0) +; RV32IZCMP-WITH-FP-NEXT: lw a3, 92(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a2, 124(a0) ; RV32IZCMP-WITH-FP-NEXT: sw a4, 120(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a6, 116(a0) -; RV32IZCMP-WITH-FP-NEXT: sw t0, 112(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a2, 108(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a3, 104(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a5, 100(a0) -; RV32IZCMP-WITH-FP-NEXT: sw a7, 96(a0) -; RV32IZCMP-WITH-FP-NEXT: sw t1, 92(a0) -; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a0) -; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a0) -; RV32IZCMP-WITH-FP-NEXT: sw s1, 80(a0) +; RV32IZCMP-WITH-FP-NEXT: sw t0, 116(a0) +; RV32IZCMP-WITH-FP-NEXT: sw s1, 112(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a1, 108(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a6, 104(a0) +; RV32IZCMP-WITH-FP-NEXT: sw t1, 100(a0) +; RV32IZCMP-WITH-FP-NEXT: sw t3, 96(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a3, 92(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a5, 88(a0) +; RV32IZCMP-WITH-FP-NEXT: sw a7, 84(a0) +; RV32IZCMP-WITH-FP-NEXT: sw t2, 80(a0) ; RV32IZCMP-WITH-FP-NEXT: sw ra, 76(a0) ; RV32IZCMP-WITH-FP-NEXT: sw s11, 72(a0) ; RV32IZCMP-WITH-FP-NEXT: sw s10, 68(a0) @@ -704,30 +704,30 @@ define void @callee() { ; RV64I-NEXT: lw s3, 68(a0) ; RV64I-NEXT: lw s4, 72(a0) ; RV64I-NEXT: lw s5, 76(a0) -; RV64I-NEXT: lw s6, 80(a0) -; RV64I-NEXT: lw s7, 84(a0) -; RV64I-NEXT: lw s8, 88(a0) -; RV64I-NEXT: lw s9, 92(a0) -; RV64I-NEXT: lw s10, 112(a0) -; RV64I-NEXT: lw s11, 116(a0) -; RV64I-NEXT: lw ra, 120(a0) -; RV64I-NEXT: lw a1, 124(a0) -; RV64I-NEXT: lw a5, 96(a0) -; RV64I-NEXT: lw a4, 100(a0) -; RV64I-NEXT: lw a3, 104(a0) -; RV64I-NEXT: lw a2, 108(a0) -; RV64I-NEXT: sw a1, 124(a0) -; RV64I-NEXT: sw ra, 120(a0) -; RV64I-NEXT: sw s11, 116(a0) -; RV64I-NEXT: sw s10, 112(a0) -; RV64I-NEXT: sw a2, 108(a0) -; RV64I-NEXT: sw a3, 104(a0) -; RV64I-NEXT: sw a4, 100(a0) -; RV64I-NEXT: sw a5, 96(a0) -; RV64I-NEXT: sw s9, 92(a0) -; RV64I-NEXT: sw s8, 88(a0) -; RV64I-NEXT: sw s7, 84(a0) -; RV64I-NEXT: sw s6, 80(a0) +; RV64I-NEXT: lw s6, 112(a0) +; RV64I-NEXT: lw s7, 116(a0) +; RV64I-NEXT: lw s8, 120(a0) +; RV64I-NEXT: lw s9, 124(a0) +; RV64I-NEXT: lw s10, 96(a0) +; RV64I-NEXT: lw s11, 100(a0) +; RV64I-NEXT: lw ra, 104(a0) +; RV64I-NEXT: lw a1, 108(a0) +; RV64I-NEXT: lw a5, 80(a0) +; RV64I-NEXT: lw a4, 84(a0) +; RV64I-NEXT: lw a3, 88(a0) +; RV64I-NEXT: lw a2, 92(a0) +; RV64I-NEXT: sw s9, 124(a0) +; RV64I-NEXT: sw s8, 120(a0) +; RV64I-NEXT: sw s7, 116(a0) +; RV64I-NEXT: sw s6, 112(a0) +; RV64I-NEXT: sw a1, 108(a0) +; RV64I-NEXT: sw ra, 104(a0) +; RV64I-NEXT: sw s11, 100(a0) +; RV64I-NEXT: sw s10, 96(a0) +; RV64I-NEXT: sw a2, 92(a0) +; RV64I-NEXT: sw a3, 88(a0) +; RV64I-NEXT: sw a4, 84(a0) +; RV64I-NEXT: sw a5, 80(a0) ; RV64I-NEXT: sw s5, 76(a0) ; RV64I-NEXT: sw s4, 72(a0) ; RV64I-NEXT: sw s3, 68(a0) @@ -820,30 +820,30 @@ define void @callee() { ; RV64I-LP64E-NEXT: lw s5, 68(a0) ; RV64I-LP64E-NEXT: lw s6, 72(a0) ; RV64I-LP64E-NEXT: lw s7, 76(a0) -; RV64I-LP64E-NEXT: lw s8, 80(a0) -; RV64I-LP64E-NEXT: lw s9, 84(a0) -; RV64I-LP64E-NEXT: lw s10, 88(a0) -; RV64I-LP64E-NEXT: lw s11, 92(a0) -; RV64I-LP64E-NEXT: lw s0, 112(a0) -; RV64I-LP64E-NEXT: lw s1, 116(a0) -; RV64I-LP64E-NEXT: lw ra, 120(a0) -; RV64I-LP64E-NEXT: lw a1, 124(a0) -; RV64I-LP64E-NEXT: lw a5, 96(a0) -; RV64I-LP64E-NEXT: lw a4, 100(a0) -; RV64I-LP64E-NEXT: lw a3, 104(a0) -; RV64I-LP64E-NEXT: lw a2, 108(a0) -; RV64I-LP64E-NEXT: sw a1, 124(a0) -; RV64I-LP64E-NEXT: sw ra, 120(a0) -; RV64I-LP64E-NEXT: sw s1, 116(a0) -; RV64I-LP64E-NEXT: sw s0, 112(a0) -; RV64I-LP64E-NEXT: sw a2, 108(a0) -; RV64I-LP64E-NEXT: sw a3, 104(a0) -; RV64I-LP64E-NEXT: sw a4, 100(a0) -; RV64I-LP64E-NEXT: sw a5, 96(a0) -; RV64I-LP64E-NEXT: sw s11, 92(a0) -; RV64I-LP64E-NEXT: sw s10, 88(a0) -; RV64I-LP64E-NEXT: sw s9, 84(a0) -; RV64I-LP64E-NEXT: sw s8, 80(a0) +; RV64I-LP64E-NEXT: lw s8, 112(a0) +; RV64I-LP64E-NEXT: lw s9, 116(a0) +; RV64I-LP64E-NEXT: lw s10, 120(a0) +; RV64I-LP64E-NEXT: lw s11, 124(a0) +; RV64I-LP64E-NEXT: lw s0, 96(a0) +; RV64I-LP64E-NEXT: lw s1, 100(a0) +; RV64I-LP64E-NEXT: lw ra, 104(a0) +; RV64I-LP64E-NEXT: lw a1, 108(a0) +; RV64I-LP64E-NEXT: lw a5, 80(a0) +; RV64I-LP64E-NEXT: lw a4, 84(a0) +; RV64I-LP64E-NEXT: lw a3, 88(a0) +; RV64I-LP64E-NEXT: lw a2, 92(a0) +; RV64I-LP64E-NEXT: sw s11, 124(a0) +; RV64I-LP64E-NEXT: sw s10, 120(a0) +; RV64I-LP64E-NEXT: sw s9, 116(a0) +; RV64I-LP64E-NEXT: sw s8, 112(a0) +; RV64I-LP64E-NEXT: sw a1, 108(a0) +; RV64I-LP64E-NEXT: sw ra, 104(a0) +; RV64I-LP64E-NEXT: sw s1, 100(a0) +; RV64I-LP64E-NEXT: sw s0, 96(a0) +; RV64I-LP64E-NEXT: sw a2, 92(a0) +; RV64I-LP64E-NEXT: sw a3, 88(a0) +; RV64I-LP64E-NEXT: sw a4, 84(a0) +; RV64I-LP64E-NEXT: sw a5, 80(a0) ; RV64I-LP64E-NEXT: sw s7, 76(a0) ; RV64I-LP64E-NEXT: sw s6, 72(a0) ; RV64I-LP64E-NEXT: sw s5, 68(a0) @@ -939,30 +939,30 @@ define void @callee() { ; RV64I-WITH-FP-NEXT: lw s4, 68(a0) ; RV64I-WITH-FP-NEXT: lw s5, 72(a0) ; RV64I-WITH-FP-NEXT: lw s6, 76(a0) -; RV64I-WITH-FP-NEXT: lw s7, 80(a0) -; RV64I-WITH-FP-NEXT: lw s8, 84(a0) -; RV64I-WITH-FP-NEXT: lw s9, 88(a0) -; RV64I-WITH-FP-NEXT: lw s10, 92(a0) -; RV64I-WITH-FP-NEXT: lw s11, 112(a0) -; RV64I-WITH-FP-NEXT: lw ra, 116(a0) -; RV64I-WITH-FP-NEXT: lw a4, 120(a0) -; RV64I-WITH-FP-NEXT: lw a1, 124(a0) -; RV64I-WITH-FP-NEXT: lw a6, 96(a0) -; RV64I-WITH-FP-NEXT: lw a5, 100(a0) -; RV64I-WITH-FP-NEXT: lw a3, 104(a0) -; RV64I-WITH-FP-NEXT: lw a2, 108(a0) -; RV64I-WITH-FP-NEXT: sw a1, 124(a0) -; RV64I-WITH-FP-NEXT: sw a4, 120(a0) -; RV64I-WITH-FP-NEXT: sw ra, 116(a0) -; RV64I-WITH-FP-NEXT: sw s11, 112(a0) -; RV64I-WITH-FP-NEXT: sw a2, 108(a0) -; RV64I-WITH-FP-NEXT: sw a3, 104(a0) -; RV64I-WITH-FP-NEXT: sw a5, 100(a0) -; RV64I-WITH-FP-NEXT: sw a6, 96(a0) -; RV64I-WITH-FP-NEXT: sw s10, 92(a0) -; RV64I-WITH-FP-NEXT: sw s9, 88(a0) -; RV64I-WITH-FP-NEXT: sw s8, 84(a0) -; RV64I-WITH-FP-NEXT: sw s7, 80(a0) +; RV64I-WITH-FP-NEXT: lw s7, 112(a0) +; RV64I-WITH-FP-NEXT: lw s8, 116(a0) +; RV64I-WITH-FP-NEXT: lw s9, 120(a0) +; RV64I-WITH-FP-NEXT: lw s10, 124(a0) +; RV64I-WITH-FP-NEXT: lw s11, 96(a0) +; RV64I-WITH-FP-NEXT: lw ra, 100(a0) +; RV64I-WITH-FP-NEXT: lw a4, 104(a0) +; RV64I-WITH-FP-NEXT: lw a1, 108(a0) +; RV64I-WITH-FP-NEXT: lw a6, 80(a0) +; RV64I-WITH-FP-NEXT: lw a5, 84(a0) +; RV64I-WITH-FP-NEXT: lw a3, 88(a0) +; RV64I-WITH-FP-NEXT: lw a2, 92(a0) +; RV64I-WITH-FP-NEXT: sw s10, 124(a0) +; RV64I-WITH-FP-NEXT: sw s9, 120(a0) +; RV64I-WITH-FP-NEXT: sw s8, 116(a0) +; RV64I-WITH-FP-NEXT: sw s7, 112(a0) +; RV64I-WITH-FP-NEXT: sw a1, 108(a0) +; RV64I-WITH-FP-NEXT: sw a4, 104(a0) +; RV64I-WITH-FP-NEXT: sw ra, 100(a0) +; RV64I-WITH-FP-NEXT: sw s11, 96(a0) +; RV64I-WITH-FP-NEXT: sw a2, 92(a0) +; RV64I-WITH-FP-NEXT: sw a3, 88(a0) +; RV64I-WITH-FP-NEXT: sw a5, 84(a0) +; RV64I-WITH-FP-NEXT: sw a6, 80(a0) ; RV64I-WITH-FP-NEXT: sw s6, 76(a0) ; RV64I-WITH-FP-NEXT: sw s5, 72(a0) ; RV64I-WITH-FP-NEXT: sw s4, 68(a0) @@ -1064,29 +1064,29 @@ define void @callee() { ; RV64IZCMP-NEXT: lw s10, 68(a0) ; RV64IZCMP-NEXT: lw s11, 72(a0) ; RV64IZCMP-NEXT: lw ra, 76(a0) -; RV64IZCMP-NEXT: lw t2, 80(a0) -; RV64IZCMP-NEXT: lw s0, 84(a0) -; RV64IZCMP-NEXT: lw s1, 88(a0) -; RV64IZCMP-NEXT: lw t1, 92(a0) ; RV64IZCMP-NEXT: lw t0, 112(a0) -; RV64IZCMP-NEXT: lw a5, 116(a0) -; RV64IZCMP-NEXT: lw a3, 120(a0) -; RV64IZCMP-NEXT: lw a1, 124(a0) -; RV64IZCMP-NEXT: lw a7, 96(a0) +; RV64IZCMP-NEXT: lw s0, 116(a0) +; RV64IZCMP-NEXT: lw s1, 120(a0) +; RV64IZCMP-NEXT: lw a5, 124(a0) +; RV64IZCMP-NEXT: lw t1, 96(a0) ; RV64IZCMP-NEXT: lw a6, 100(a0) -; RV64IZCMP-NEXT: lw a4, 104(a0) -; RV64IZCMP-NEXT: lw a2, 108(a0) -; RV64IZCMP-NEXT: sw a1, 124(a0) -; RV64IZCMP-NEXT: sw a3, 120(a0) -; RV64IZCMP-NEXT: sw a5, 116(a0) +; RV64IZCMP-NEXT: lw a3, 104(a0) +; RV64IZCMP-NEXT: lw a1, 108(a0) +; RV64IZCMP-NEXT: lw t2, 80(a0) +; RV64IZCMP-NEXT: lw a7, 84(a0) +; RV64IZCMP-NEXT: lw a4, 88(a0) +; RV64IZCMP-NEXT: lw a2, 92(a0) +; RV64IZCMP-NEXT: sw a5, 124(a0) +; RV64IZCMP-NEXT: sw s1, 120(a0) +; RV64IZCMP-NEXT: sw s0, 116(a0) ; RV64IZCMP-NEXT: sw t0, 112(a0) -; RV64IZCMP-NEXT: sw a2, 108(a0) -; RV64IZCMP-NEXT: sw a4, 104(a0) +; RV64IZCMP-NEXT: sw a1, 108(a0) +; RV64IZCMP-NEXT: sw a3, 104(a0) ; RV64IZCMP-NEXT: sw a6, 100(a0) -; RV64IZCMP-NEXT: sw a7, 96(a0) -; RV64IZCMP-NEXT: sw t1, 92(a0) -; RV64IZCMP-NEXT: sw s1, 88(a0) -; RV64IZCMP-NEXT: sw s0, 84(a0) +; RV64IZCMP-NEXT: sw t1, 96(a0) +; RV64IZCMP-NEXT: sw a2, 92(a0) +; RV64IZCMP-NEXT: sw a4, 88(a0) +; RV64IZCMP-NEXT: sw a7, 84(a0) ; RV64IZCMP-NEXT: sw t2, 80(a0) ; RV64IZCMP-NEXT: sw ra, 76(a0) ; RV64IZCMP-NEXT: sw s11, 72(a0) @@ -1175,30 +1175,30 @@ define void @callee() { ; RV64IZCMP-WITH-FP-NEXT: lw s10, 68(a0) ; RV64IZCMP-WITH-FP-NEXT: lw s11, 72(a0) ; RV64IZCMP-WITH-FP-NEXT: lw ra, 76(a0) -; RV64IZCMP-WITH-FP-NEXT: lw s1, 80(a0) -; RV64IZCMP-WITH-FP-NEXT: lw t3, 84(a0) -; RV64IZCMP-WITH-FP-NEXT: lw t2, 88(a0) -; RV64IZCMP-WITH-FP-NEXT: lw t1, 92(a0) -; RV64IZCMP-WITH-FP-NEXT: lw t0, 112(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a6, 116(a0) +; RV64IZCMP-WITH-FP-NEXT: lw s1, 112(a0) +; RV64IZCMP-WITH-FP-NEXT: lw t0, 116(a0) ; RV64IZCMP-WITH-FP-NEXT: lw a4, 120(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a1, 124(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a7, 96(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a5, 100(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a3, 104(a0) -; RV64IZCMP-WITH-FP-NEXT: lw a2, 108(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a1, 124(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a2, 124(a0) +; RV64IZCMP-WITH-FP-NEXT: lw t3, 96(a0) +; RV64IZCMP-WITH-FP-NEXT: lw t1, 100(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a6, 104(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a1, 108(a0) +; RV64IZCMP-WITH-FP-NEXT: lw t2, 80(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a7, 84(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a5, 88(a0) +; RV64IZCMP-WITH-FP-NEXT: lw a3, 92(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a2, 124(a0) ; RV64IZCMP-WITH-FP-NEXT: sw a4, 120(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a6, 116(a0) -; RV64IZCMP-WITH-FP-NEXT: sw t0, 112(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a2, 108(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a3, 104(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a5, 100(a0) -; RV64IZCMP-WITH-FP-NEXT: sw a7, 96(a0) -; RV64IZCMP-WITH-FP-NEXT: sw t1, 92(a0) -; RV64IZCMP-WITH-FP-NEXT: sw t2, 88(a0) -; RV64IZCMP-WITH-FP-NEXT: sw t3, 84(a0) -; RV64IZCMP-WITH-FP-NEXT: sw s1, 80(a0) +; RV64IZCMP-WITH-FP-NEXT: sw t0, 116(a0) +; RV64IZCMP-WITH-FP-NEXT: sw s1, 112(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a1, 108(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a6, 104(a0) +; RV64IZCMP-WITH-FP-NEXT: sw t1, 100(a0) +; RV64IZCMP-WITH-FP-NEXT: sw t3, 96(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a3, 92(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a5, 88(a0) +; RV64IZCMP-WITH-FP-NEXT: sw a7, 84(a0) +; RV64IZCMP-WITH-FP-NEXT: sw t2, 80(a0) ; RV64IZCMP-WITH-FP-NEXT: sw ra, 76(a0) ; RV64IZCMP-WITH-FP-NEXT: sw s11, 72(a0) ; RV64IZCMP-WITH-FP-NEXT: sw s10, 68(a0) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll index d8e6b7f3ede9a..4c3fb428bc9cf 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -439,15 +439,15 @@ define i32 @caller_half_on_stack() nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a7, 5 +; RV32I-NEXT: lui a2, 5 ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: addi t0, a2, -1792 ; RV32I-NEXT: li a2, 3 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: li a6, 7 -; RV32I-NEXT: addi t0, a7, -1792 ; RV32I-NEXT: li a7, 8 ; RV32I-NEXT: sw t0, 0(sp) ; RV32I-NEXT: call callee_half_on_stack @@ -459,15 +459,15 @@ define i32 @caller_half_on_stack() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a7, 5 +; RV64I-NEXT: lui a2, 5 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: addi t0, a2, -1792 ; RV64I-NEXT: li a2, 3 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 ; RV64I-NEXT: li a6, 7 -; RV64I-NEXT: addi t0, a7, -1792 ; RV64I-NEXT: li a7, 8 ; RV64I-NEXT: sd t0, 0(sp) ; RV64I-NEXT: call callee_half_on_stack @@ -479,15 +479,15 @@ define i32 @caller_half_on_stack() nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: lui a7, 1048565 +; RV32IF-NEXT: lui a2, 1048565 ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: li a1, 2 +; RV32IF-NEXT: addi t0, a2, -1792 ; RV32IF-NEXT: li a2, 3 ; RV32IF-NEXT: li a3, 4 ; RV32IF-NEXT: li a4, 5 ; RV32IF-NEXT: li a5, 6 ; RV32IF-NEXT: li a6, 7 -; RV32IF-NEXT: addi t0, a7, -1792 ; RV32IF-NEXT: li a7, 8 ; RV32IF-NEXT: sw t0, 0(sp) ; RV32IF-NEXT: call callee_half_on_stack @@ -499,15 +499,15 @@ define i32 @caller_half_on_stack() nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: lui a7, 1048565 +; RV64IF-NEXT: lui a2, 1048565 ; RV64IF-NEXT: li a0, 1 ; RV64IF-NEXT: li a1, 2 +; RV64IF-NEXT: addi t0, a2, -1792 ; RV64IF-NEXT: li a2, 3 ; RV64IF-NEXT: li a3, 4 ; RV64IF-NEXT: li a4, 5 ; RV64IF-NEXT: li a5, 6 ; RV64IF-NEXT: li a6, 7 -; RV64IF-NEXT: addi t0, a7, -1792 ; RV64IF-NEXT: li a7, 8 ; RV64IF-NEXT: sw t0, 0(sp) ; RV64IF-NEXT: call callee_half_on_stack @@ -519,7 +519,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32F: # %bb.0: ; RV32-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32F-NEXT: lui a7, 1048565 +; RV32-ILP32F-NEXT: lui a0, 1048565 +; RV32-ILP32F-NEXT: addi a0, a0, -1792 +; RV32-ILP32F-NEXT: fmv.w.x fa0, a0 ; RV32-ILP32F-NEXT: li a0, 1 ; RV32-ILP32F-NEXT: li a1, 2 ; RV32-ILP32F-NEXT: li a2, 3 @@ -527,8 +529,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32F-NEXT: li a4, 5 ; RV32-ILP32F-NEXT: li a5, 6 ; RV32-ILP32F-NEXT: li a6, 7 -; RV32-ILP32F-NEXT: addi a7, a7, -1792 -; RV32-ILP32F-NEXT: fmv.w.x fa0, a7 ; RV32-ILP32F-NEXT: li a7, 8 ; RV32-ILP32F-NEXT: call callee_half_on_stack ; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -539,7 +539,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64F: # %bb.0: ; RV64-LP64F-NEXT: addi sp, sp, -16 ; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64F-NEXT: lui a7, 1048565 +; RV64-LP64F-NEXT: lui a0, 1048565 +; RV64-LP64F-NEXT: addi a0, a0, -1792 +; RV64-LP64F-NEXT: fmv.w.x fa0, a0 ; RV64-LP64F-NEXT: li a0, 1 ; RV64-LP64F-NEXT: li a1, 2 ; RV64-LP64F-NEXT: li a2, 3 @@ -547,8 +549,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64F-NEXT: li a4, 5 ; RV64-LP64F-NEXT: li a5, 6 ; RV64-LP64F-NEXT: li a6, 7 -; RV64-LP64F-NEXT: addi a7, a7, -1792 -; RV64-LP64F-NEXT: fmv.w.x fa0, a7 ; RV64-LP64F-NEXT: li a7, 8 ; RV64-LP64F-NEXT: call callee_half_on_stack ; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -559,7 +559,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32ZFHMIN: # %bb.0: ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: lui a7, 5 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 5 +; RV32-ILP32ZFHMIN-NEXT: addi a0, a0, -1792 +; RV32-ILP32ZFHMIN-NEXT: fmv.h.x fa0, a0 ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 ; RV32-ILP32ZFHMIN-NEXT: li a1, 2 ; RV32-ILP32ZFHMIN-NEXT: li a2, 3 @@ -567,8 +569,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ILP32ZFHMIN-NEXT: li a4, 5 ; RV32-ILP32ZFHMIN-NEXT: li a5, 6 ; RV32-ILP32ZFHMIN-NEXT: li a6, 7 -; RV32-ILP32ZFHMIN-NEXT: addi a7, a7, -1792 -; RV32-ILP32ZFHMIN-NEXT: fmv.h.x fa0, a7 ; RV32-ILP32ZFHMIN-NEXT: li a7, 8 ; RV32-ILP32ZFHMIN-NEXT: call callee_half_on_stack ; RV32-ILP32ZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -579,7 +579,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64ZFHMIN: # %bb.0: ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: lui a7, 5 +; RV64-LP64ZFHMIN-NEXT: lui a0, 5 +; RV64-LP64ZFHMIN-NEXT: addi a0, a0, -1792 +; RV64-LP64ZFHMIN-NEXT: fmv.h.x fa0, a0 ; RV64-LP64ZFHMIN-NEXT: li a0, 1 ; RV64-LP64ZFHMIN-NEXT: li a1, 2 ; RV64-LP64ZFHMIN-NEXT: li a2, 3 @@ -587,8 +589,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-LP64ZFHMIN-NEXT: li a4, 5 ; RV64-LP64ZFHMIN-NEXT: li a5, 6 ; RV64-LP64ZFHMIN-NEXT: li a6, 7 -; RV64-LP64ZFHMIN-NEXT: addi a7, a7, -1792 -; RV64-LP64ZFHMIN-NEXT: fmv.h.x fa0, a7 ; RV64-LP64ZFHMIN-NEXT: li a7, 8 ; RV64-LP64ZFHMIN-NEXT: call callee_half_on_stack ; RV64-LP64ZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -599,7 +599,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32: # %bb.0: ; RV32-ZFH-ILP32-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32-NEXT: lui a7, 5 +; RV32-ZFH-ILP32-NEXT: lui a0, 5 +; RV32-ZFH-ILP32-NEXT: addi a0, a0, -1792 +; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a0 ; RV32-ZFH-ILP32-NEXT: li a0, 1 ; RV32-ZFH-ILP32-NEXT: li a1, 2 ; RV32-ZFH-ILP32-NEXT: li a2, 3 @@ -607,8 +609,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32-NEXT: li a4, 5 ; RV32-ZFH-ILP32-NEXT: li a5, 6 ; RV32-ZFH-ILP32-NEXT: li a6, 7 -; RV32-ZFH-ILP32-NEXT: addi a7, a7, -1792 -; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a7 ; RV32-ZFH-ILP32-NEXT: li a7, 8 ; RV32-ZFH-ILP32-NEXT: fsh fa5, 0(sp) ; RV32-ZFH-ILP32-NEXT: call callee_half_on_stack @@ -620,7 +620,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32F: # %bb.0: ; RV32-ZFH-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32F-NEXT: lui a7, 5 +; RV32-ZFH-ILP32F-NEXT: lui a0, 5 +; RV32-ZFH-ILP32F-NEXT: addi a0, a0, -1792 +; RV32-ZFH-ILP32F-NEXT: fmv.h.x fa0, a0 ; RV32-ZFH-ILP32F-NEXT: li a0, 1 ; RV32-ZFH-ILP32F-NEXT: li a1, 2 ; RV32-ZFH-ILP32F-NEXT: li a2, 3 @@ -628,8 +630,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV32-ZFH-ILP32F-NEXT: li a4, 5 ; RV32-ZFH-ILP32F-NEXT: li a5, 6 ; RV32-ZFH-ILP32F-NEXT: li a6, 7 -; RV32-ZFH-ILP32F-NEXT: addi a7, a7, -1792 -; RV32-ZFH-ILP32F-NEXT: fmv.h.x fa0, a7 ; RV32-ZFH-ILP32F-NEXT: li a7, 8 ; RV32-ZFH-ILP32F-NEXT: call callee_half_on_stack ; RV32-ZFH-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -640,7 +640,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64: # %bb.0: ; RV64-ZFH-LP64-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64-NEXT: lui a7, 5 +; RV64-ZFH-LP64-NEXT: lui a0, 5 +; RV64-ZFH-LP64-NEXT: addi a0, a0, -1792 +; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a0 ; RV64-ZFH-LP64-NEXT: li a0, 1 ; RV64-ZFH-LP64-NEXT: li a1, 2 ; RV64-ZFH-LP64-NEXT: li a2, 3 @@ -648,8 +650,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64-NEXT: li a4, 5 ; RV64-ZFH-LP64-NEXT: li a5, 6 ; RV64-ZFH-LP64-NEXT: li a6, 7 -; RV64-ZFH-LP64-NEXT: addi a7, a7, -1792 -; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a7 ; RV64-ZFH-LP64-NEXT: li a7, 8 ; RV64-ZFH-LP64-NEXT: fsh fa5, 0(sp) ; RV64-ZFH-LP64-NEXT: call callee_half_on_stack @@ -661,7 +661,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64F: # %bb.0: ; RV64-ZFH-LP64F-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64F-NEXT: lui a7, 5 +; RV64-ZFH-LP64F-NEXT: lui a0, 5 +; RV64-ZFH-LP64F-NEXT: addi a0, a0, -1792 +; RV64-ZFH-LP64F-NEXT: fmv.h.x fa0, a0 ; RV64-ZFH-LP64F-NEXT: li a0, 1 ; RV64-ZFH-LP64F-NEXT: li a1, 2 ; RV64-ZFH-LP64F-NEXT: li a2, 3 @@ -669,8 +671,6 @@ define i32 @caller_half_on_stack() nounwind { ; RV64-ZFH-LP64F-NEXT: li a4, 5 ; RV64-ZFH-LP64F-NEXT: li a5, 6 ; RV64-ZFH-LP64F-NEXT: li a6, 7 -; RV64-ZFH-LP64F-NEXT: addi a7, a7, -1792 -; RV64-ZFH-LP64F-NEXT: fmv.h.x fa0, a7 ; RV64-ZFH-LP64F-NEXT: li a7, 8 ; RV64-ZFH-LP64F-NEXT: call callee_half_on_stack ; RV64-ZFH-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -748,9 +748,9 @@ define i32 @callee_half_on_stack_exhausted_gprs_fprs(i32 %a, float %fa, i32 %b, ; RV32-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-ILP32F-NEXT: mv s0, a7 ; RV32-ILP32F-NEXT: lhu a0, 16(sp) ; RV32-ILP32F-NEXT: fmv.w.x fa0, a0 +; RV32-ILP32F-NEXT: mv s0, a7 ; RV32-ILP32F-NEXT: call __extendhfsf2 ; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-ILP32F-NEXT: add a0, s0, a0 @@ -794,9 +794,9 @@ define i32 @callee_half_on_stack_exhausted_gprs_fprs(i32 %a, float %fa, i32 %b, ; RV32-ZFH-ILP32-LABEL: callee_half_on_stack_exhausted_gprs_fprs: ; RV32-ZFH-ILP32: # %bb.0: ; RV32-ZFH-ILP32-NEXT: flh fa5, 32(sp) -; RV32-ZFH-ILP32-NEXT: lw a0, 24(sp) -; RV32-ZFH-ILP32-NEXT: fcvt.w.h a1, fa5, rtz -; RV32-ZFH-ILP32-NEXT: add a0, a0, a1 +; RV32-ZFH-ILP32-NEXT: fcvt.w.h a0, fa5, rtz +; RV32-ZFH-ILP32-NEXT: lw a1, 24(sp) +; RV32-ZFH-ILP32-NEXT: add a0, a1, a0 ; RV32-ZFH-ILP32-NEXT: ret ; ; RV32-ZFH-ILP32F-LABEL: callee_half_on_stack_exhausted_gprs_fprs: @@ -809,9 +809,9 @@ define i32 @callee_half_on_stack_exhausted_gprs_fprs(i32 %a, float %fa, i32 %b, ; RV64-ZFH-LP64-LABEL: callee_half_on_stack_exhausted_gprs_fprs: ; RV64-ZFH-LP64: # %bb.0: ; RV64-ZFH-LP64-NEXT: flh fa5, 64(sp) -; RV64-ZFH-LP64-NEXT: lw a0, 48(sp) -; RV64-ZFH-LP64-NEXT: fcvt.w.h a1, fa5, rtz -; RV64-ZFH-LP64-NEXT: addw a0, a0, a1 +; RV64-ZFH-LP64-NEXT: fcvt.w.h a0, fa5, rtz +; RV64-ZFH-LP64-NEXT: lw a1, 48(sp) +; RV64-ZFH-LP64-NEXT: addw a0, a1, a0 ; RV64-ZFH-LP64-NEXT: ret ; ; RV64-ZFH-LP64F-LABEL: callee_half_on_stack_exhausted_gprs_fprs: @@ -830,33 +830,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a7, 5 -; RV32I-NEXT: lui a6, 266240 -; RV32I-NEXT: li t0, 8 -; RV32I-NEXT: lui t1, 265728 -; RV32I-NEXT: li t2, 7 -; RV32I-NEXT: lui t3, 265216 -; RV32I-NEXT: li t4, 6 -; RV32I-NEXT: lui t5, 264704 -; RV32I-NEXT: li t6, 5 +; RV32I-NEXT: lui a0, 5 +; RV32I-NEXT: lui a1, 266240 +; RV32I-NEXT: li a2, 8 +; RV32I-NEXT: addi a0, a0, -1792 +; RV32I-NEXT: lui a3, 265728 +; RV32I-NEXT: sw a0, 32(sp) +; RV32I-NEXT: li a0, 7 +; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a3, 20(sp) +; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: lui t0, 265216 +; RV32I-NEXT: li t1, 6 +; RV32I-NEXT: lui t2, 264704 +; RV32I-NEXT: li t3, 5 ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: lui a1, 260096 ; RV32I-NEXT: li a2, 2 ; RV32I-NEXT: lui a3, 262144 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: lui a5, 263168 -; RV32I-NEXT: sw t2, 16(sp) -; RV32I-NEXT: sw t1, 20(sp) -; RV32I-NEXT: sw t0, 24(sp) -; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: li a6, 4 -; RV32I-NEXT: addi a7, a7, -1792 -; RV32I-NEXT: sw a7, 32(sp) ; RV32I-NEXT: lui a7, 264192 -; RV32I-NEXT: sw t6, 0(sp) -; RV32I-NEXT: sw t5, 4(sp) -; RV32I-NEXT: sw t4, 8(sp) -; RV32I-NEXT: sw t3, 12(sp) +; RV32I-NEXT: sw t3, 0(sp) +; RV32I-NEXT: sw t2, 4(sp) +; RV32I-NEXT: sw t1, 8(sp) +; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: call callee_half_on_stack ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 @@ -866,33 +866,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a7, 5 -; RV64I-NEXT: lui a6, 266240 -; RV64I-NEXT: li t0, 8 -; RV64I-NEXT: lui t1, 265728 -; RV64I-NEXT: li t2, 7 -; RV64I-NEXT: lui t3, 265216 -; RV64I-NEXT: li t4, 6 -; RV64I-NEXT: lui t5, 264704 -; RV64I-NEXT: li t6, 5 +; RV64I-NEXT: lui a0, 5 +; RV64I-NEXT: lui a1, 266240 +; RV64I-NEXT: li a2, 8 +; RV64I-NEXT: addi a0, a0, -1792 +; RV64I-NEXT: lui a3, 265728 +; RV64I-NEXT: sd a0, 64(sp) +; RV64I-NEXT: li a0, 7 +; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: sd a2, 48(sp) +; RV64I-NEXT: sd a1, 56(sp) +; RV64I-NEXT: lui t0, 265216 +; RV64I-NEXT: li t1, 6 +; RV64I-NEXT: lui t2, 264704 +; RV64I-NEXT: li t3, 5 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: lui a1, 260096 ; RV64I-NEXT: li a2, 2 ; RV64I-NEXT: lui a3, 262144 ; RV64I-NEXT: li a4, 3 ; RV64I-NEXT: lui a5, 263168 -; RV64I-NEXT: sd t2, 32(sp) -; RV64I-NEXT: sd t1, 40(sp) -; RV64I-NEXT: sd t0, 48(sp) -; RV64I-NEXT: sd a6, 56(sp) ; RV64I-NEXT: li a6, 4 -; RV64I-NEXT: addi a7, a7, -1792 -; RV64I-NEXT: sd a7, 64(sp) ; RV64I-NEXT: lui a7, 264192 -; RV64I-NEXT: sd t6, 0(sp) -; RV64I-NEXT: sd t5, 8(sp) -; RV64I-NEXT: sd t4, 16(sp) -; RV64I-NEXT: sd t3, 24(sp) +; RV64I-NEXT: sd t3, 0(sp) +; RV64I-NEXT: sd t2, 8(sp) +; RV64I-NEXT: sd t1, 16(sp) +; RV64I-NEXT: sd t0, 24(sp) ; RV64I-NEXT: call callee_half_on_stack ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 @@ -902,33 +902,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -48 ; RV32IF-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32IF-NEXT: lui a7, 1048565 -; RV32IF-NEXT: lui a6, 266240 -; RV32IF-NEXT: li t0, 8 -; RV32IF-NEXT: lui t1, 265728 -; RV32IF-NEXT: li t2, 7 -; RV32IF-NEXT: lui t3, 265216 -; RV32IF-NEXT: li t4, 6 -; RV32IF-NEXT: lui t5, 264704 -; RV32IF-NEXT: li t6, 5 +; RV32IF-NEXT: lui a0, 1048565 +; RV32IF-NEXT: lui a1, 266240 +; RV32IF-NEXT: li a2, 8 +; RV32IF-NEXT: addi a0, a0, -1792 +; RV32IF-NEXT: lui a3, 265728 +; RV32IF-NEXT: sw a0, 32(sp) +; RV32IF-NEXT: li a0, 7 +; RV32IF-NEXT: sw a0, 16(sp) +; RV32IF-NEXT: sw a3, 20(sp) +; RV32IF-NEXT: sw a2, 24(sp) +; RV32IF-NEXT: sw a1, 28(sp) +; RV32IF-NEXT: lui t0, 265216 +; RV32IF-NEXT: li t1, 6 +; RV32IF-NEXT: lui t2, 264704 +; RV32IF-NEXT: li t3, 5 ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: lui a1, 260096 ; RV32IF-NEXT: li a2, 2 ; RV32IF-NEXT: lui a3, 262144 ; RV32IF-NEXT: li a4, 3 ; RV32IF-NEXT: lui a5, 263168 -; RV32IF-NEXT: sw t2, 16(sp) -; RV32IF-NEXT: sw t1, 20(sp) -; RV32IF-NEXT: sw t0, 24(sp) -; RV32IF-NEXT: sw a6, 28(sp) ; RV32IF-NEXT: li a6, 4 -; RV32IF-NEXT: addi a7, a7, -1792 -; RV32IF-NEXT: sw a7, 32(sp) ; RV32IF-NEXT: lui a7, 264192 -; RV32IF-NEXT: sw t6, 0(sp) -; RV32IF-NEXT: sw t5, 4(sp) -; RV32IF-NEXT: sw t4, 8(sp) -; RV32IF-NEXT: sw t3, 12(sp) +; RV32IF-NEXT: sw t3, 0(sp) +; RV32IF-NEXT: sw t2, 4(sp) +; RV32IF-NEXT: sw t1, 8(sp) +; RV32IF-NEXT: sw t0, 12(sp) ; RV32IF-NEXT: call callee_half_on_stack ; RV32IF-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 48 @@ -938,33 +938,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -80 ; RV64IF-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64IF-NEXT: lui a7, 1048565 -; RV64IF-NEXT: lui a6, 266240 -; RV64IF-NEXT: li t0, 8 -; RV64IF-NEXT: lui t1, 265728 -; RV64IF-NEXT: li t2, 7 -; RV64IF-NEXT: lui t3, 265216 -; RV64IF-NEXT: li t4, 6 -; RV64IF-NEXT: lui t5, 264704 -; RV64IF-NEXT: li t6, 5 +; RV64IF-NEXT: lui a0, 1048565 +; RV64IF-NEXT: lui a1, 266240 +; RV64IF-NEXT: li a2, 8 +; RV64IF-NEXT: addi a0, a0, -1792 +; RV64IF-NEXT: lui a3, 265728 +; RV64IF-NEXT: sw a0, 64(sp) +; RV64IF-NEXT: li a0, 7 +; RV64IF-NEXT: sd a0, 32(sp) +; RV64IF-NEXT: sw a3, 40(sp) +; RV64IF-NEXT: sd a2, 48(sp) +; RV64IF-NEXT: sw a1, 56(sp) +; RV64IF-NEXT: lui t0, 265216 +; RV64IF-NEXT: li t1, 6 +; RV64IF-NEXT: lui t2, 264704 +; RV64IF-NEXT: li t3, 5 ; RV64IF-NEXT: li a0, 1 ; RV64IF-NEXT: lui a1, 260096 ; RV64IF-NEXT: li a2, 2 ; RV64IF-NEXT: lui a3, 262144 ; RV64IF-NEXT: li a4, 3 ; RV64IF-NEXT: lui a5, 263168 -; RV64IF-NEXT: sd t2, 32(sp) -; RV64IF-NEXT: sw t1, 40(sp) -; RV64IF-NEXT: sd t0, 48(sp) -; RV64IF-NEXT: sw a6, 56(sp) ; RV64IF-NEXT: li a6, 4 -; RV64IF-NEXT: addi a7, a7, -1792 -; RV64IF-NEXT: sw a7, 64(sp) ; RV64IF-NEXT: lui a7, 264192 -; RV64IF-NEXT: sd t6, 0(sp) -; RV64IF-NEXT: sw t5, 8(sp) -; RV64IF-NEXT: sd t4, 16(sp) -; RV64IF-NEXT: sw t3, 24(sp) +; RV64IF-NEXT: sd t3, 0(sp) +; RV64IF-NEXT: sw t2, 8(sp) +; RV64IF-NEXT: sd t1, 16(sp) +; RV64IF-NEXT: sw t0, 24(sp) ; RV64IF-NEXT: call callee_half_on_stack ; RV64IF-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 80 @@ -974,33 +974,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32F: # %bb.0: ; RV32-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32F-NEXT: lui a7, 1048565 -; RV32-ILP32F-NEXT: lui a0, 260096 -; RV32-ILP32F-NEXT: lui a1, 262144 -; RV32-ILP32F-NEXT: lui a2, 263168 -; RV32-ILP32F-NEXT: lui a3, 264192 -; RV32-ILP32F-NEXT: lui a4, 264704 -; RV32-ILP32F-NEXT: lui a5, 265216 -; RV32-ILP32F-NEXT: lui a6, 265728 -; RV32-ILP32F-NEXT: lui t0, 266240 -; RV32-ILP32F-NEXT: fmv.w.x fa0, a0 +; RV32-ILP32F-NEXT: lui a0, 1048565 +; RV32-ILP32F-NEXT: lui a1, 260096 +; RV32-ILP32F-NEXT: lui a2, 262144 +; RV32-ILP32F-NEXT: addi t0, a0, -1792 +; RV32-ILP32F-NEXT: fmv.w.x fa0, a1 +; RV32-ILP32F-NEXT: fmv.w.x fa1, a2 +; RV32-ILP32F-NEXT: lui a0, 263168 +; RV32-ILP32F-NEXT: lui a1, 264192 +; RV32-ILP32F-NEXT: fmv.w.x fa2, a0 +; RV32-ILP32F-NEXT: fmv.w.x fa3, a1 +; RV32-ILP32F-NEXT: lui a0, 264704 +; RV32-ILP32F-NEXT: lui a1, 265216 +; RV32-ILP32F-NEXT: fmv.w.x fa4, a0 +; RV32-ILP32F-NEXT: fmv.w.x fa5, a1 +; RV32-ILP32F-NEXT: lui a0, 265728 +; RV32-ILP32F-NEXT: lui a1, 266240 +; RV32-ILP32F-NEXT: fmv.w.x fa6, a0 +; RV32-ILP32F-NEXT: fmv.w.x fa7, a1 ; RV32-ILP32F-NEXT: li a0, 1 -; RV32-ILP32F-NEXT: fmv.w.x fa1, a1 ; RV32-ILP32F-NEXT: li a1, 2 -; RV32-ILP32F-NEXT: fmv.w.x fa2, a2 ; RV32-ILP32F-NEXT: li a2, 3 -; RV32-ILP32F-NEXT: fmv.w.x fa3, a3 ; RV32-ILP32F-NEXT: li a3, 4 -; RV32-ILP32F-NEXT: fmv.w.x fa4, a4 ; RV32-ILP32F-NEXT: li a4, 5 -; RV32-ILP32F-NEXT: fmv.w.x fa5, a5 ; RV32-ILP32F-NEXT: li a5, 6 -; RV32-ILP32F-NEXT: fmv.w.x fa6, a6 ; RV32-ILP32F-NEXT: li a6, 7 -; RV32-ILP32F-NEXT: addi t1, a7, -1792 -; RV32-ILP32F-NEXT: fmv.w.x fa7, t0 ; RV32-ILP32F-NEXT: li a7, 8 -; RV32-ILP32F-NEXT: sw t1, 0(sp) +; RV32-ILP32F-NEXT: sw t0, 0(sp) ; RV32-ILP32F-NEXT: call callee_half_on_stack ; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-ILP32F-NEXT: addi sp, sp, 16 @@ -1010,33 +1010,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-LP64F: # %bb.0: ; RV64-LP64F-NEXT: addi sp, sp, -16 ; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64F-NEXT: lui a7, 1048565 -; RV64-LP64F-NEXT: lui a0, 260096 -; RV64-LP64F-NEXT: lui a1, 262144 -; RV64-LP64F-NEXT: lui a2, 263168 -; RV64-LP64F-NEXT: lui a3, 264192 -; RV64-LP64F-NEXT: lui a4, 264704 -; RV64-LP64F-NEXT: lui a5, 265216 -; RV64-LP64F-NEXT: lui a6, 265728 -; RV64-LP64F-NEXT: lui t0, 266240 -; RV64-LP64F-NEXT: fmv.w.x fa0, a0 +; RV64-LP64F-NEXT: lui a0, 1048565 +; RV64-LP64F-NEXT: lui a1, 260096 +; RV64-LP64F-NEXT: lui a2, 262144 +; RV64-LP64F-NEXT: addi t0, a0, -1792 +; RV64-LP64F-NEXT: fmv.w.x fa0, a1 +; RV64-LP64F-NEXT: fmv.w.x fa1, a2 +; RV64-LP64F-NEXT: lui a0, 263168 +; RV64-LP64F-NEXT: lui a1, 264192 +; RV64-LP64F-NEXT: fmv.w.x fa2, a0 +; RV64-LP64F-NEXT: fmv.w.x fa3, a1 +; RV64-LP64F-NEXT: lui a0, 264704 +; RV64-LP64F-NEXT: lui a1, 265216 +; RV64-LP64F-NEXT: fmv.w.x fa4, a0 +; RV64-LP64F-NEXT: fmv.w.x fa5, a1 +; RV64-LP64F-NEXT: lui a0, 265728 +; RV64-LP64F-NEXT: lui a1, 266240 +; RV64-LP64F-NEXT: fmv.w.x fa6, a0 +; RV64-LP64F-NEXT: fmv.w.x fa7, a1 ; RV64-LP64F-NEXT: li a0, 1 -; RV64-LP64F-NEXT: fmv.w.x fa1, a1 ; RV64-LP64F-NEXT: li a1, 2 -; RV64-LP64F-NEXT: fmv.w.x fa2, a2 ; RV64-LP64F-NEXT: li a2, 3 -; RV64-LP64F-NEXT: fmv.w.x fa3, a3 ; RV64-LP64F-NEXT: li a3, 4 -; RV64-LP64F-NEXT: fmv.w.x fa4, a4 ; RV64-LP64F-NEXT: li a4, 5 -; RV64-LP64F-NEXT: fmv.w.x fa5, a5 ; RV64-LP64F-NEXT: li a5, 6 -; RV64-LP64F-NEXT: fmv.w.x fa6, a6 ; RV64-LP64F-NEXT: li a6, 7 -; RV64-LP64F-NEXT: addi t1, a7, -1792 -; RV64-LP64F-NEXT: fmv.w.x fa7, t0 ; RV64-LP64F-NEXT: li a7, 8 -; RV64-LP64F-NEXT: sw t1, 0(sp) +; RV64-LP64F-NEXT: sw t0, 0(sp) ; RV64-LP64F-NEXT: call callee_half_on_stack ; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-LP64F-NEXT: addi sp, sp, 16 @@ -1046,32 +1046,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32ZFHMIN: # %bb.0: ; RV32-ILP32ZFHMIN-NEXT: addi sp, sp, -16 ; RV32-ILP32ZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32ZFHMIN-NEXT: lui a7, 5 -; RV32-ILP32ZFHMIN-NEXT: lui a0, 260096 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 5 +; RV32-ILP32ZFHMIN-NEXT: lui a1, 260096 +; RV32-ILP32ZFHMIN-NEXT: addi a0, a0, -1792 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa0, a1 ; RV32-ILP32ZFHMIN-NEXT: lui a1, 262144 -; RV32-ILP32ZFHMIN-NEXT: lui a2, 263168 -; RV32-ILP32ZFHMIN-NEXT: lui a3, 264192 -; RV32-ILP32ZFHMIN-NEXT: lui a4, 264704 -; RV32-ILP32ZFHMIN-NEXT: lui a5, 265216 -; RV32-ILP32ZFHMIN-NEXT: lui a6, 265728 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ILP32ZFHMIN-NEXT: lui t0, 266240 +; RV32-ILP32ZFHMIN-NEXT: fmv.h.x ft0, a0 ; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa1, a1 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 263168 +; RV32-ILP32ZFHMIN-NEXT: lui a1, 264192 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa2, a0 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa3, a1 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 264704 +; RV32-ILP32ZFHMIN-NEXT: lui a1, 265216 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa5, a1 +; RV32-ILP32ZFHMIN-NEXT: lui a0, 265728 +; RV32-ILP32ZFHMIN-NEXT: lui a1, 266240 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa6, a0 +; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa7, a1 ; RV32-ILP32ZFHMIN-NEXT: li a0, 1 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa2, a2 ; RV32-ILP32ZFHMIN-NEXT: li a1, 2 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa3, a3 ; RV32-ILP32ZFHMIN-NEXT: li a2, 3 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa4, a4 ; RV32-ILP32ZFHMIN-NEXT: li a3, 4 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa5, a5 ; RV32-ILP32ZFHMIN-NEXT: li a4, 5 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa6, a6 ; RV32-ILP32ZFHMIN-NEXT: li a5, 6 -; RV32-ILP32ZFHMIN-NEXT: fmv.w.x fa7, t0 ; RV32-ILP32ZFHMIN-NEXT: li a6, 7 -; RV32-ILP32ZFHMIN-NEXT: addi a7, a7, -1792 -; RV32-ILP32ZFHMIN-NEXT: fmv.h.x ft0, a7 ; RV32-ILP32ZFHMIN-NEXT: li a7, 8 ; RV32-ILP32ZFHMIN-NEXT: fsh ft0, 0(sp) ; RV32-ILP32ZFHMIN-NEXT: call callee_half_on_stack @@ -1083,32 +1083,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-LP64ZFHMIN: # %bb.0: ; RV64-LP64ZFHMIN-NEXT: addi sp, sp, -16 ; RV64-LP64ZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64ZFHMIN-NEXT: lui a7, 5 -; RV64-LP64ZFHMIN-NEXT: lui a0, 260096 +; RV64-LP64ZFHMIN-NEXT: lui a0, 5 +; RV64-LP64ZFHMIN-NEXT: lui a1, 260096 +; RV64-LP64ZFHMIN-NEXT: addi a0, a0, -1792 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa0, a1 ; RV64-LP64ZFHMIN-NEXT: lui a1, 262144 -; RV64-LP64ZFHMIN-NEXT: lui a2, 263168 -; RV64-LP64ZFHMIN-NEXT: lui a3, 264192 -; RV64-LP64ZFHMIN-NEXT: lui a4, 264704 -; RV64-LP64ZFHMIN-NEXT: lui a5, 265216 -; RV64-LP64ZFHMIN-NEXT: lui a6, 265728 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-LP64ZFHMIN-NEXT: lui t0, 266240 +; RV64-LP64ZFHMIN-NEXT: fmv.h.x ft0, a0 ; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa1, a1 +; RV64-LP64ZFHMIN-NEXT: lui a0, 263168 +; RV64-LP64ZFHMIN-NEXT: lui a1, 264192 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa2, a0 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa3, a1 +; RV64-LP64ZFHMIN-NEXT: lui a0, 264704 +; RV64-LP64ZFHMIN-NEXT: lui a1, 265216 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa4, a0 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa5, a1 +; RV64-LP64ZFHMIN-NEXT: lui a0, 265728 +; RV64-LP64ZFHMIN-NEXT: lui a1, 266240 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa6, a0 +; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa7, a1 ; RV64-LP64ZFHMIN-NEXT: li a0, 1 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa2, a2 ; RV64-LP64ZFHMIN-NEXT: li a1, 2 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa3, a3 ; RV64-LP64ZFHMIN-NEXT: li a2, 3 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa4, a4 ; RV64-LP64ZFHMIN-NEXT: li a3, 4 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa5, a5 ; RV64-LP64ZFHMIN-NEXT: li a4, 5 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa6, a6 ; RV64-LP64ZFHMIN-NEXT: li a5, 6 -; RV64-LP64ZFHMIN-NEXT: fmv.w.x fa7, t0 ; RV64-LP64ZFHMIN-NEXT: li a6, 7 -; RV64-LP64ZFHMIN-NEXT: addi a7, a7, -1792 -; RV64-LP64ZFHMIN-NEXT: fmv.h.x ft0, a7 ; RV64-LP64ZFHMIN-NEXT: li a7, 8 ; RV64-LP64ZFHMIN-NEXT: fsh ft0, 0(sp) ; RV64-LP64ZFHMIN-NEXT: call callee_half_on_stack @@ -1120,33 +1120,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ZFH-ILP32: # %bb.0: ; RV32-ZFH-ILP32-NEXT: addi sp, sp, -48 ; RV32-ZFH-ILP32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32-NEXT: lui a5, 266240 -; RV32-ZFH-ILP32-NEXT: li a6, 8 -; RV32-ZFH-ILP32-NEXT: lui a7, 265728 -; RV32-ZFH-ILP32-NEXT: li t0, 7 -; RV32-ZFH-ILP32-NEXT: lui t1, 265216 -; RV32-ZFH-ILP32-NEXT: li t2, 6 -; RV32-ZFH-ILP32-NEXT: lui t3, 264704 -; RV32-ZFH-ILP32-NEXT: li t4, 5 -; RV32-ZFH-ILP32-NEXT: lui t5, 5 +; RV32-ZFH-ILP32-NEXT: lui a0, 266240 +; RV32-ZFH-ILP32-NEXT: li a1, 8 +; RV32-ZFH-ILP32-NEXT: lui a2, 265728 +; RV32-ZFH-ILP32-NEXT: li a3, 7 +; RV32-ZFH-ILP32-NEXT: sw a3, 16(sp) +; RV32-ZFH-ILP32-NEXT: sw a2, 20(sp) +; RV32-ZFH-ILP32-NEXT: sw a1, 24(sp) +; RV32-ZFH-ILP32-NEXT: sw a0, 28(sp) +; RV32-ZFH-ILP32-NEXT: lui t0, 265216 +; RV32-ZFH-ILP32-NEXT: li t1, 6 +; RV32-ZFH-ILP32-NEXT: lui t2, 264704 +; RV32-ZFH-ILP32-NEXT: lui a0, 5 +; RV32-ZFH-ILP32-NEXT: li t3, 5 +; RV32-ZFH-ILP32-NEXT: addi a0, a0, -1792 +; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a0 ; RV32-ZFH-ILP32-NEXT: li a0, 1 ; RV32-ZFH-ILP32-NEXT: lui a1, 260096 ; RV32-ZFH-ILP32-NEXT: li a2, 2 ; RV32-ZFH-ILP32-NEXT: lui a3, 262144 ; RV32-ZFH-ILP32-NEXT: li a4, 3 -; RV32-ZFH-ILP32-NEXT: sw t0, 16(sp) -; RV32-ZFH-ILP32-NEXT: sw a7, 20(sp) -; RV32-ZFH-ILP32-NEXT: sw a6, 24(sp) -; RV32-ZFH-ILP32-NEXT: sw a5, 28(sp) ; RV32-ZFH-ILP32-NEXT: lui a5, 263168 -; RV32-ZFH-ILP32-NEXT: sw t4, 0(sp) -; RV32-ZFH-ILP32-NEXT: sw t3, 4(sp) -; RV32-ZFH-ILP32-NEXT: sw t2, 8(sp) -; RV32-ZFH-ILP32-NEXT: sw t1, 12(sp) ; RV32-ZFH-ILP32-NEXT: li a6, 4 -; RV32-ZFH-ILP32-NEXT: addi a7, t5, -1792 -; RV32-ZFH-ILP32-NEXT: fmv.h.x fa5, a7 ; RV32-ZFH-ILP32-NEXT: lui a7, 264192 +; RV32-ZFH-ILP32-NEXT: sw t3, 0(sp) +; RV32-ZFH-ILP32-NEXT: sw t2, 4(sp) +; RV32-ZFH-ILP32-NEXT: sw t1, 8(sp) +; RV32-ZFH-ILP32-NEXT: sw t0, 12(sp) ; RV32-ZFH-ILP32-NEXT: fsh fa5, 32(sp) ; RV32-ZFH-ILP32-NEXT: call callee_half_on_stack ; RV32-ZFH-ILP32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1157,32 +1157,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ZFH-ILP32F: # %bb.0: ; RV32-ZFH-ILP32F-NEXT: addi sp, sp, -16 ; RV32-ZFH-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ZFH-ILP32F-NEXT: lui a7, 5 -; RV32-ZFH-ILP32F-NEXT: lui a0, 260096 +; RV32-ZFH-ILP32F-NEXT: lui a0, 5 +; RV32-ZFH-ILP32F-NEXT: lui a1, 260096 +; RV32-ZFH-ILP32F-NEXT: addi a0, a0, -1792 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa0, a1 ; RV32-ZFH-ILP32F-NEXT: lui a1, 262144 -; RV32-ZFH-ILP32F-NEXT: lui a2, 263168 -; RV32-ZFH-ILP32F-NEXT: lui a3, 264192 -; RV32-ZFH-ILP32F-NEXT: lui a4, 264704 -; RV32-ZFH-ILP32F-NEXT: lui a5, 265216 -; RV32-ZFH-ILP32F-NEXT: lui a6, 265728 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa0, a0 -; RV32-ZFH-ILP32F-NEXT: lui t0, 266240 +; RV32-ZFH-ILP32F-NEXT: fmv.h.x ft0, a0 ; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa1, a1 +; RV32-ZFH-ILP32F-NEXT: lui a0, 263168 +; RV32-ZFH-ILP32F-NEXT: lui a1, 264192 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa2, a0 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa3, a1 +; RV32-ZFH-ILP32F-NEXT: lui a0, 264704 +; RV32-ZFH-ILP32F-NEXT: lui a1, 265216 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa4, a0 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa5, a1 +; RV32-ZFH-ILP32F-NEXT: lui a0, 265728 +; RV32-ZFH-ILP32F-NEXT: lui a1, 266240 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa6, a0 +; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa7, a1 ; RV32-ZFH-ILP32F-NEXT: li a0, 1 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa2, a2 ; RV32-ZFH-ILP32F-NEXT: li a1, 2 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa3, a3 ; RV32-ZFH-ILP32F-NEXT: li a2, 3 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa4, a4 ; RV32-ZFH-ILP32F-NEXT: li a3, 4 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa5, a5 ; RV32-ZFH-ILP32F-NEXT: li a4, 5 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa6, a6 ; RV32-ZFH-ILP32F-NEXT: li a5, 6 -; RV32-ZFH-ILP32F-NEXT: fmv.w.x fa7, t0 ; RV32-ZFH-ILP32F-NEXT: li a6, 7 -; RV32-ZFH-ILP32F-NEXT: addi a7, a7, -1792 -; RV32-ZFH-ILP32F-NEXT: fmv.h.x ft0, a7 ; RV32-ZFH-ILP32F-NEXT: li a7, 8 ; RV32-ZFH-ILP32F-NEXT: fsh ft0, 0(sp) ; RV32-ZFH-ILP32F-NEXT: call callee_half_on_stack @@ -1194,33 +1194,33 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-ZFH-LP64: # %bb.0: ; RV64-ZFH-LP64-NEXT: addi sp, sp, -80 ; RV64-ZFH-LP64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64-NEXT: lui a5, 266240 -; RV64-ZFH-LP64-NEXT: li a6, 8 -; RV64-ZFH-LP64-NEXT: lui a7, 265728 -; RV64-ZFH-LP64-NEXT: li t0, 7 -; RV64-ZFH-LP64-NEXT: lui t1, 265216 -; RV64-ZFH-LP64-NEXT: li t2, 6 -; RV64-ZFH-LP64-NEXT: lui t3, 264704 -; RV64-ZFH-LP64-NEXT: li t4, 5 -; RV64-ZFH-LP64-NEXT: lui t5, 5 +; RV64-ZFH-LP64-NEXT: lui a0, 266240 +; RV64-ZFH-LP64-NEXT: li a1, 8 +; RV64-ZFH-LP64-NEXT: lui a2, 265728 +; RV64-ZFH-LP64-NEXT: li a3, 7 +; RV64-ZFH-LP64-NEXT: sd a3, 32(sp) +; RV64-ZFH-LP64-NEXT: sw a2, 40(sp) +; RV64-ZFH-LP64-NEXT: sd a1, 48(sp) +; RV64-ZFH-LP64-NEXT: sw a0, 56(sp) +; RV64-ZFH-LP64-NEXT: lui t0, 265216 +; RV64-ZFH-LP64-NEXT: li t1, 6 +; RV64-ZFH-LP64-NEXT: lui t2, 264704 +; RV64-ZFH-LP64-NEXT: lui a0, 5 +; RV64-ZFH-LP64-NEXT: li t3, 5 +; RV64-ZFH-LP64-NEXT: addi a0, a0, -1792 +; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a0 ; RV64-ZFH-LP64-NEXT: li a0, 1 ; RV64-ZFH-LP64-NEXT: lui a1, 260096 ; RV64-ZFH-LP64-NEXT: li a2, 2 ; RV64-ZFH-LP64-NEXT: lui a3, 262144 ; RV64-ZFH-LP64-NEXT: li a4, 3 -; RV64-ZFH-LP64-NEXT: sd t0, 32(sp) -; RV64-ZFH-LP64-NEXT: sw a7, 40(sp) -; RV64-ZFH-LP64-NEXT: sd a6, 48(sp) -; RV64-ZFH-LP64-NEXT: sw a5, 56(sp) ; RV64-ZFH-LP64-NEXT: lui a5, 263168 -; RV64-ZFH-LP64-NEXT: sd t4, 0(sp) -; RV64-ZFH-LP64-NEXT: sw t3, 8(sp) -; RV64-ZFH-LP64-NEXT: sd t2, 16(sp) -; RV64-ZFH-LP64-NEXT: sw t1, 24(sp) ; RV64-ZFH-LP64-NEXT: li a6, 4 -; RV64-ZFH-LP64-NEXT: addi a7, t5, -1792 -; RV64-ZFH-LP64-NEXT: fmv.h.x fa5, a7 ; RV64-ZFH-LP64-NEXT: lui a7, 264192 +; RV64-ZFH-LP64-NEXT: sd t3, 0(sp) +; RV64-ZFH-LP64-NEXT: sw t2, 8(sp) +; RV64-ZFH-LP64-NEXT: sd t1, 16(sp) +; RV64-ZFH-LP64-NEXT: sw t0, 24(sp) ; RV64-ZFH-LP64-NEXT: fsh fa5, 64(sp) ; RV64-ZFH-LP64-NEXT: call callee_half_on_stack ; RV64-ZFH-LP64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload @@ -1231,32 +1231,32 @@ define i32 @caller_half_on_stack_exhausted_gprs_fprs() nounwind { ; RV64-ZFH-LP64F: # %bb.0: ; RV64-ZFH-LP64F-NEXT: addi sp, sp, -16 ; RV64-ZFH-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-ZFH-LP64F-NEXT: lui a7, 5 -; RV64-ZFH-LP64F-NEXT: lui a0, 260096 +; RV64-ZFH-LP64F-NEXT: lui a0, 5 +; RV64-ZFH-LP64F-NEXT: lui a1, 260096 +; RV64-ZFH-LP64F-NEXT: addi a0, a0, -1792 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa0, a1 ; RV64-ZFH-LP64F-NEXT: lui a1, 262144 -; RV64-ZFH-LP64F-NEXT: lui a2, 263168 -; RV64-ZFH-LP64F-NEXT: lui a3, 264192 -; RV64-ZFH-LP64F-NEXT: lui a4, 264704 -; RV64-ZFH-LP64F-NEXT: lui a5, 265216 -; RV64-ZFH-LP64F-NEXT: lui a6, 265728 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa0, a0 -; RV64-ZFH-LP64F-NEXT: lui t0, 266240 +; RV64-ZFH-LP64F-NEXT: fmv.h.x ft0, a0 ; RV64-ZFH-LP64F-NEXT: fmv.w.x fa1, a1 +; RV64-ZFH-LP64F-NEXT: lui a0, 263168 +; RV64-ZFH-LP64F-NEXT: lui a1, 264192 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa2, a0 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa3, a1 +; RV64-ZFH-LP64F-NEXT: lui a0, 264704 +; RV64-ZFH-LP64F-NEXT: lui a1, 265216 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa4, a0 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa5, a1 +; RV64-ZFH-LP64F-NEXT: lui a0, 265728 +; RV64-ZFH-LP64F-NEXT: lui a1, 266240 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa6, a0 +; RV64-ZFH-LP64F-NEXT: fmv.w.x fa7, a1 ; RV64-ZFH-LP64F-NEXT: li a0, 1 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa2, a2 ; RV64-ZFH-LP64F-NEXT: li a1, 2 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa3, a3 ; RV64-ZFH-LP64F-NEXT: li a2, 3 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa4, a4 ; RV64-ZFH-LP64F-NEXT: li a3, 4 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa5, a5 ; RV64-ZFH-LP64F-NEXT: li a4, 5 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa6, a6 ; RV64-ZFH-LP64F-NEXT: li a5, 6 -; RV64-ZFH-LP64F-NEXT: fmv.w.x fa7, t0 ; RV64-ZFH-LP64F-NEXT: li a6, 7 -; RV64-ZFH-LP64F-NEXT: addi a7, a7, -1792 -; RV64-ZFH-LP64F-NEXT: fmv.h.x ft0, a7 ; RV64-ZFH-LP64F-NEXT: li a7, 8 ; RV64-ZFH-LP64F-NEXT: fsh ft0, 0(sp) ; RV64-ZFH-LP64F-NEXT: call callee_half_on_stack diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll index 9387b7ef4c32e..8518b0e576025 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -145,45 +145,43 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: li a5, 18 -; RV32I-FPELIM-NEXT: li a6, 17 -; RV32I-FPELIM-NEXT: li a7, 16 -; RV32I-FPELIM-NEXT: lui t0, 262236 -; RV32I-FPELIM-NEXT: lui t1, 377487 -; RV32I-FPELIM-NEXT: li t2, 15 -; RV32I-FPELIM-NEXT: lui t3, 262153 -; RV32I-FPELIM-NEXT: lui t4, 545260 -; RV32I-FPELIM-NEXT: lui t5, 964690 -; RV32I-FPELIM-NEXT: lui t6, 335544 -; RV32I-FPELIM-NEXT: lui s0, 688509 +; RV32I-FPELIM-NEXT: li a0, 17 +; RV32I-FPELIM-NEXT: li a1, 18 +; RV32I-FPELIM-NEXT: sw a0, 20(sp) +; RV32I-FPELIM-NEXT: sw a1, 24(sp) +; RV32I-FPELIM-NEXT: lui a0, 262236 +; RV32I-FPELIM-NEXT: li a1, 16 +; RV32I-FPELIM-NEXT: addi a0, a0, 655 +; RV32I-FPELIM-NEXT: lui a2, 377487 +; RV32I-FPELIM-NEXT: li a3, 15 +; RV32I-FPELIM-NEXT: addi a2, a2, 1475 +; RV32I-FPELIM-NEXT: sw a3, 0(sp) +; RV32I-FPELIM-NEXT: sw a2, 8(sp) +; RV32I-FPELIM-NEXT: sw a0, 12(sp) +; RV32I-FPELIM-NEXT: sw a1, 16(sp) +; RV32I-FPELIM-NEXT: lui a0, 262153 +; RV32I-FPELIM-NEXT: addi t0, a0, 491 +; RV32I-FPELIM-NEXT: lui a0, 545260 +; RV32I-FPELIM-NEXT: addi t1, a0, -1967 +; RV32I-FPELIM-NEXT: lui a0, 964690 +; RV32I-FPELIM-NEXT: addi t2, a0, -328 +; RV32I-FPELIM-NEXT: lui a0, 335544 +; RV32I-FPELIM-NEXT: addi t3, a0, 1311 +; RV32I-FPELIM-NEXT: lui a5, 688509 +; RV32I-FPELIM-NEXT: addi a5, a5, -2048 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 11 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 ; RV32I-FPELIM-NEXT: li a3, 12 ; RV32I-FPELIM-NEXT: li a4, 13 -; RV32I-FPELIM-NEXT: sw a6, 20(sp) -; RV32I-FPELIM-NEXT: sw a5, 24(sp) ; RV32I-FPELIM-NEXT: li a6, 4 -; RV32I-FPELIM-NEXT: addi a5, t0, 655 -; RV32I-FPELIM-NEXT: addi t0, t1, 1475 -; RV32I-FPELIM-NEXT: sw t2, 0(sp) -; RV32I-FPELIM-NEXT: sw t0, 8(sp) -; RV32I-FPELIM-NEXT: sw a5, 12(sp) -; RV32I-FPELIM-NEXT: sw a7, 16(sp) ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: addi t0, t3, 491 -; RV32I-FPELIM-NEXT: addi t1, t4, -1967 -; RV32I-FPELIM-NEXT: addi t2, t5, -328 -; RV32I-FPELIM-NEXT: addi t3, t6, 1311 -; RV32I-FPELIM-NEXT: addi a5, s0, -2048 ; RV32I-FPELIM-NEXT: sw t3, 32(sp) ; RV32I-FPELIM-NEXT: sw t2, 36(sp) ; RV32I-FPELIM-NEXT: sw t1, 40(sp) ; RV32I-FPELIM-NEXT: sw t0, 44(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 ; RV32I-FPELIM-NEXT: ret ; @@ -192,39 +190,38 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: addi sp, sp, -64 ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: sw s1, 52(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: li a5, 18 -; RV32I-WITHFP-NEXT: li a6, 17 -; RV32I-WITHFP-NEXT: li a7, 16 -; RV32I-WITHFP-NEXT: lui t0, 262236 -; RV32I-WITHFP-NEXT: lui t1, 377487 -; RV32I-WITHFP-NEXT: li t2, 15 -; RV32I-WITHFP-NEXT: lui t3, 262153 -; RV32I-WITHFP-NEXT: lui t4, 545260 -; RV32I-WITHFP-NEXT: lui t5, 964690 -; RV32I-WITHFP-NEXT: lui t6, 335544 -; RV32I-WITHFP-NEXT: lui s1, 688509 +; RV32I-WITHFP-NEXT: li a0, 17 +; RV32I-WITHFP-NEXT: li a1, 18 +; RV32I-WITHFP-NEXT: sw a0, 20(sp) +; RV32I-WITHFP-NEXT: sw a1, 24(sp) +; RV32I-WITHFP-NEXT: lui a0, 262236 +; RV32I-WITHFP-NEXT: li a1, 16 +; RV32I-WITHFP-NEXT: addi a0, a0, 655 +; RV32I-WITHFP-NEXT: lui a2, 377487 +; RV32I-WITHFP-NEXT: li a3, 15 +; RV32I-WITHFP-NEXT: addi a2, a2, 1475 +; RV32I-WITHFP-NEXT: sw a3, 0(sp) +; RV32I-WITHFP-NEXT: sw a2, 8(sp) +; RV32I-WITHFP-NEXT: sw a0, 12(sp) +; RV32I-WITHFP-NEXT: sw a1, 16(sp) +; RV32I-WITHFP-NEXT: lui a0, 262153 +; RV32I-WITHFP-NEXT: addi t0, a0, 491 +; RV32I-WITHFP-NEXT: lui a0, 545260 +; RV32I-WITHFP-NEXT: addi t1, a0, -1967 +; RV32I-WITHFP-NEXT: lui a0, 964690 +; RV32I-WITHFP-NEXT: addi t2, a0, -328 +; RV32I-WITHFP-NEXT: lui a0, 335544 +; RV32I-WITHFP-NEXT: addi t3, a0, 1311 +; RV32I-WITHFP-NEXT: lui a5, 688509 +; RV32I-WITHFP-NEXT: addi a5, a5, -2048 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 11 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 ; RV32I-WITHFP-NEXT: li a3, 12 ; RV32I-WITHFP-NEXT: li a4, 13 -; RV32I-WITHFP-NEXT: sw a6, 20(sp) -; RV32I-WITHFP-NEXT: sw a5, 24(sp) ; RV32I-WITHFP-NEXT: li a6, 4 -; RV32I-WITHFP-NEXT: addi a5, t0, 655 -; RV32I-WITHFP-NEXT: addi t0, t1, 1475 -; RV32I-WITHFP-NEXT: sw t2, 0(sp) -; RV32I-WITHFP-NEXT: sw t0, 8(sp) -; RV32I-WITHFP-NEXT: sw a5, 12(sp) -; RV32I-WITHFP-NEXT: sw a7, 16(sp) ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: addi t0, t3, 491 -; RV32I-WITHFP-NEXT: addi t1, t4, -1967 -; RV32I-WITHFP-NEXT: addi t2, t5, -328 -; RV32I-WITHFP-NEXT: addi t3, t6, 1311 -; RV32I-WITHFP-NEXT: addi a5, s1, -2048 ; RV32I-WITHFP-NEXT: sw t3, -32(s0) ; RV32I-WITHFP-NEXT: sw t2, -28(s0) ; RV32I-WITHFP-NEXT: sw t1, -24(s0) @@ -232,7 +229,6 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: lw s1, 52(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 64 ; RV32I-WITHFP-NEXT: ret %1 = call i32 @callee_aligned_stack(i32 1, i32 11, diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 8149179c6412d..9568062e90b5e 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -82,20 +82,20 @@ define i32 @caller_i64_in_regs() nounwind { define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i64 %g, i32 %h) nounwind { ; RV32I-FPELIM-LABEL: callee_many_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw t0, 4(sp) -; RV32I-FPELIM-NEXT: lw t1, 0(sp) +; RV32I-FPELIM-NEXT: lw t0, 0(sp) +; RV32I-FPELIM-NEXT: lw t1, 4(sp) ; RV32I-FPELIM-NEXT: zext.b a0, a0 ; RV32I-FPELIM-NEXT: slli a1, a1, 16 -; RV32I-FPELIM-NEXT: xor a3, a3, a7 ; RV32I-FPELIM-NEXT: srli a1, a1, 16 ; RV32I-FPELIM-NEXT: add a0, a0, a2 ; RV32I-FPELIM-NEXT: add a0, a0, a1 +; RV32I-FPELIM-NEXT: xor a1, a4, t0 +; RV32I-FPELIM-NEXT: xor a2, a3, a7 ; RV32I-FPELIM-NEXT: add a0, a0, a5 -; RV32I-FPELIM-NEXT: xor a1, a4, t1 +; RV32I-FPELIM-NEXT: or a1, a2, a1 ; RV32I-FPELIM-NEXT: add a0, a0, a6 -; RV32I-FPELIM-NEXT: or a1, a3, a1 ; RV32I-FPELIM-NEXT: seqz a1, a1 -; RV32I-FPELIM-NEXT: add a0, a0, t0 +; RV32I-FPELIM-NEXT: add a0, a0, t1 ; RV32I-FPELIM-NEXT: add a0, a1, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -105,20 +105,20 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw t0, 4(s0) -; RV32I-WITHFP-NEXT: lw t1, 0(s0) +; RV32I-WITHFP-NEXT: lw t0, 0(s0) +; RV32I-WITHFP-NEXT: lw t1, 4(s0) ; RV32I-WITHFP-NEXT: zext.b a0, a0 ; RV32I-WITHFP-NEXT: slli a1, a1, 16 -; RV32I-WITHFP-NEXT: xor a3, a3, a7 ; RV32I-WITHFP-NEXT: srli a1, a1, 16 ; RV32I-WITHFP-NEXT: add a0, a0, a2 ; RV32I-WITHFP-NEXT: add a0, a0, a1 +; RV32I-WITHFP-NEXT: xor a1, a4, t0 +; RV32I-WITHFP-NEXT: xor a2, a3, a7 ; RV32I-WITHFP-NEXT: add a0, a0, a5 -; RV32I-WITHFP-NEXT: xor a1, a4, t1 +; RV32I-WITHFP-NEXT: or a1, a2, a1 ; RV32I-WITHFP-NEXT: add a0, a0, a6 -; RV32I-WITHFP-NEXT: or a1, a3, a1 ; RV32I-WITHFP-NEXT: seqz a1, a1 -; RV32I-WITHFP-NEXT: add a0, a0, t0 +; RV32I-WITHFP-NEXT: add a0, a0, t1 ; RV32I-WITHFP-NEXT: add a0, a1, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -194,16 +194,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-NEXT: lw a3, 4(a1) ; RV32I-FPELIM-NEXT: lw a4, 8(a1) ; RV32I-FPELIM-NEXT: lw a1, 12(a1) -; RV32I-FPELIM-NEXT: lw a5, 12(a0) -; RV32I-FPELIM-NEXT: lw a6, 4(a0) -; RV32I-FPELIM-NEXT: lw a7, 8(a0) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: xor a1, a5, a1 -; RV32I-FPELIM-NEXT: xor a3, a6, a3 -; RV32I-FPELIM-NEXT: xor a4, a7, a4 -; RV32I-FPELIM-NEXT: xor a0, a0, a2 +; RV32I-FPELIM-NEXT: lw a5, 4(a0) +; RV32I-FPELIM-NEXT: lw a6, 12(a0) +; RV32I-FPELIM-NEXT: lw a7, 0(a0) +; RV32I-FPELIM-NEXT: lw a0, 8(a0) +; RV32I-FPELIM-NEXT: xor a1, a6, a1 +; RV32I-FPELIM-NEXT: xor a3, a5, a3 +; RV32I-FPELIM-NEXT: xor a0, a0, a4 +; RV32I-FPELIM-NEXT: xor a2, a7, a2 ; RV32I-FPELIM-NEXT: or a1, a3, a1 -; RV32I-FPELIM-NEXT: or a0, a0, a4 +; RV32I-FPELIM-NEXT: or a0, a2, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a1 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret @@ -218,16 +218,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-WITHFP-NEXT: lw a3, 4(a1) ; RV32I-WITHFP-NEXT: lw a4, 8(a1) ; RV32I-WITHFP-NEXT: lw a1, 12(a1) -; RV32I-WITHFP-NEXT: lw a5, 12(a0) -; RV32I-WITHFP-NEXT: lw a6, 4(a0) -; RV32I-WITHFP-NEXT: lw a7, 8(a0) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: xor a1, a5, a1 -; RV32I-WITHFP-NEXT: xor a3, a6, a3 -; RV32I-WITHFP-NEXT: xor a4, a7, a4 -; RV32I-WITHFP-NEXT: xor a0, a0, a2 +; RV32I-WITHFP-NEXT: lw a5, 4(a0) +; RV32I-WITHFP-NEXT: lw a6, 12(a0) +; RV32I-WITHFP-NEXT: lw a7, 0(a0) +; RV32I-WITHFP-NEXT: lw a0, 8(a0) +; RV32I-WITHFP-NEXT: xor a1, a6, a1 +; RV32I-WITHFP-NEXT: xor a3, a5, a3 +; RV32I-WITHFP-NEXT: xor a0, a0, a4 +; RV32I-WITHFP-NEXT: xor a2, a7, a2 ; RV32I-WITHFP-NEXT: or a1, a3, a1 -; RV32I-WITHFP-NEXT: or a0, a0, a4 +; RV32I-WITHFP-NEXT: or a0, a2, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a1 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -245,13 +245,13 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: lui a1, 524272 -; RV32I-FPELIM-NEXT: li a2, 1 -; RV32I-FPELIM-NEXT: addi a0, sp, 24 +; RV32I-FPELIM-NEXT: lui a0, 524272 ; RV32I-FPELIM-NEXT: sw zero, 0(sp) ; RV32I-FPELIM-NEXT: sw zero, 4(sp) ; RV32I-FPELIM-NEXT: sw zero, 8(sp) -; RV32I-FPELIM-NEXT: sw a1, 12(sp) +; RV32I-FPELIM-NEXT: sw a0, 12(sp) +; RV32I-FPELIM-NEXT: li a2, 1 +; RV32I-FPELIM-NEXT: addi a0, sp, 24 ; RV32I-FPELIM-NEXT: mv a1, sp ; RV32I-FPELIM-NEXT: sw a2, 24(sp) ; RV32I-FPELIM-NEXT: sw zero, 28(sp) @@ -268,13 +268,13 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 -; RV32I-WITHFP-NEXT: lui a1, 524272 -; RV32I-WITHFP-NEXT: li a2, 1 -; RV32I-WITHFP-NEXT: addi a0, s0, -24 +; RV32I-WITHFP-NEXT: lui a0, 524272 ; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -44(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw a1, -36(s0) +; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: li a2, 1 +; RV32I-WITHFP-NEXT: addi a0, s0, -24 ; RV32I-WITHFP-NEXT: addi a1, s0, -48 ; RV32I-WITHFP-NEXT: sw a2, -24(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) @@ -301,16 +301,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-FPELIM-NEXT: lw a2, 4(a7) ; RV32I-FPELIM-NEXT: lw a3, 8(a7) ; RV32I-FPELIM-NEXT: lw a4, 12(a7) -; RV32I-FPELIM-NEXT: lw a5, 12(a0) -; RV32I-FPELIM-NEXT: lw a6, 4(a0) -; RV32I-FPELIM-NEXT: lw a7, 8(a0) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: xor a4, a4, a5 -; RV32I-FPELIM-NEXT: xor a2, a2, a6 -; RV32I-FPELIM-NEXT: xor a3, a3, a7 -; RV32I-FPELIM-NEXT: xor a0, a1, a0 +; RV32I-FPELIM-NEXT: lw a5, 4(a0) +; RV32I-FPELIM-NEXT: lw a6, 12(a0) +; RV32I-FPELIM-NEXT: lw a7, 0(a0) +; RV32I-FPELIM-NEXT: lw a0, 8(a0) +; RV32I-FPELIM-NEXT: xor a4, a4, a6 +; RV32I-FPELIM-NEXT: xor a2, a2, a5 +; RV32I-FPELIM-NEXT: xor a0, a3, a0 +; RV32I-FPELIM-NEXT: xor a1, a1, a7 ; RV32I-FPELIM-NEXT: or a2, a2, a4 -; RV32I-FPELIM-NEXT: or a0, a0, a3 +; RV32I-FPELIM-NEXT: or a0, a1, a0 ; RV32I-FPELIM-NEXT: or a0, a0, a2 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret @@ -326,16 +326,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-WITHFP-NEXT: lw a2, 4(a7) ; RV32I-WITHFP-NEXT: lw a3, 8(a7) ; RV32I-WITHFP-NEXT: lw a4, 12(a7) -; RV32I-WITHFP-NEXT: lw a5, 12(a0) -; RV32I-WITHFP-NEXT: lw a6, 4(a0) -; RV32I-WITHFP-NEXT: lw a7, 8(a0) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: xor a4, a4, a5 -; RV32I-WITHFP-NEXT: xor a2, a2, a6 -; RV32I-WITHFP-NEXT: xor a3, a3, a7 -; RV32I-WITHFP-NEXT: xor a0, a1, a0 +; RV32I-WITHFP-NEXT: lw a5, 4(a0) +; RV32I-WITHFP-NEXT: lw a6, 12(a0) +; RV32I-WITHFP-NEXT: lw a7, 0(a0) +; RV32I-WITHFP-NEXT: lw a0, 8(a0) +; RV32I-WITHFP-NEXT: xor a4, a4, a6 +; RV32I-WITHFP-NEXT: xor a2, a2, a5 +; RV32I-WITHFP-NEXT: xor a0, a3, a0 +; RV32I-WITHFP-NEXT: xor a1, a1, a7 ; RV32I-WITHFP-NEXT: or a2, a2, a4 -; RV32I-WITHFP-NEXT: or a0, a0, a3 +; RV32I-WITHFP-NEXT: or a0, a1, a0 ; RV32I-WITHFP-NEXT: or a0, a0, a2 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -353,25 +353,25 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: addi a6, sp, 16 -; RV32I-FPELIM-NEXT: li a7, 9 -; RV32I-FPELIM-NEXT: lui t0, 524272 -; RV32I-FPELIM-NEXT: li t1, 8 +; RV32I-FPELIM-NEXT: addi a0, sp, 16 +; RV32I-FPELIM-NEXT: li a1, 9 +; RV32I-FPELIM-NEXT: sw a1, 0(sp) +; RV32I-FPELIM-NEXT: sw a0, 4(sp) +; RV32I-FPELIM-NEXT: lui a0, 524272 +; RV32I-FPELIM-NEXT: sw zero, 16(sp) +; RV32I-FPELIM-NEXT: sw zero, 20(sp) +; RV32I-FPELIM-NEXT: sw zero, 24(sp) +; RV32I-FPELIM-NEXT: sw a0, 28(sp) +; RV32I-FPELIM-NEXT: li t0, 8 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 2 ; RV32I-FPELIM-NEXT: li a2, 3 ; RV32I-FPELIM-NEXT: li a3, 4 ; RV32I-FPELIM-NEXT: li a4, 5 ; RV32I-FPELIM-NEXT: li a5, 6 -; RV32I-FPELIM-NEXT: sw a7, 0(sp) -; RV32I-FPELIM-NEXT: sw a6, 4(sp) ; RV32I-FPELIM-NEXT: li a6, 7 -; RV32I-FPELIM-NEXT: sw zero, 16(sp) -; RV32I-FPELIM-NEXT: sw zero, 20(sp) -; RV32I-FPELIM-NEXT: sw zero, 24(sp) -; RV32I-FPELIM-NEXT: sw t0, 28(sp) ; RV32I-FPELIM-NEXT: addi a7, sp, 40 -; RV32I-FPELIM-NEXT: sw t1, 40(sp) +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: sw zero, 44(sp) ; RV32I-FPELIM-NEXT: sw zero, 48(sp) ; RV32I-FPELIM-NEXT: sw zero, 52(sp) @@ -386,25 +386,25 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: addi a6, s0, -48 -; RV32I-WITHFP-NEXT: li a7, 9 -; RV32I-WITHFP-NEXT: lui t0, 524272 -; RV32I-WITHFP-NEXT: li t1, 8 +; RV32I-WITHFP-NEXT: addi a0, s0, -48 +; RV32I-WITHFP-NEXT: li a1, 9 +; RV32I-WITHFP-NEXT: sw a1, 0(sp) +; RV32I-WITHFP-NEXT: sw a0, 4(sp) +; RV32I-WITHFP-NEXT: lui a0, 524272 +; RV32I-WITHFP-NEXT: sw zero, -48(s0) +; RV32I-WITHFP-NEXT: sw zero, -44(s0) +; RV32I-WITHFP-NEXT: sw zero, -40(s0) +; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: li t0, 8 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 2 ; RV32I-WITHFP-NEXT: li a2, 3 ; RV32I-WITHFP-NEXT: li a3, 4 ; RV32I-WITHFP-NEXT: li a4, 5 ; RV32I-WITHFP-NEXT: li a5, 6 -; RV32I-WITHFP-NEXT: sw a7, 0(sp) -; RV32I-WITHFP-NEXT: sw a6, 4(sp) ; RV32I-WITHFP-NEXT: li a6, 7 -; RV32I-WITHFP-NEXT: sw zero, -48(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) -; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw t0, -36(s0) ; RV32I-WITHFP-NEXT: addi a7, s0, -24 -; RV32I-WITHFP-NEXT: sw t1, -24(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) @@ -664,34 +664,34 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: li a5, 19 -; RV32I-FPELIM-NEXT: li a6, 18 -; RV32I-FPELIM-NEXT: li a7, 17 -; RV32I-FPELIM-NEXT: li t0, 16 -; RV32I-FPELIM-NEXT: li t1, 15 -; RV32I-FPELIM-NEXT: lui t2, 262153 -; RV32I-FPELIM-NEXT: lui t3, 545260 -; RV32I-FPELIM-NEXT: lui t4, 964690 -; RV32I-FPELIM-NEXT: lui t5, 335544 -; RV32I-FPELIM-NEXT: lui t6, 688509 +; RV32I-FPELIM-NEXT: li a0, 19 +; RV32I-FPELIM-NEXT: li a1, 18 +; RV32I-FPELIM-NEXT: sw a1, 20(sp) +; RV32I-FPELIM-NEXT: sw a0, 24(sp) +; RV32I-FPELIM-NEXT: li a0, 17 +; RV32I-FPELIM-NEXT: li a1, 15 +; RV32I-FPELIM-NEXT: li a2, 16 +; RV32I-FPELIM-NEXT: sw a1, 0(sp) +; RV32I-FPELIM-NEXT: sw a2, 8(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) +; RV32I-FPELIM-NEXT: sw a0, 16(sp) +; RV32I-FPELIM-NEXT: lui a0, 262153 +; RV32I-FPELIM-NEXT: addi t0, a0, 491 +; RV32I-FPELIM-NEXT: lui a0, 545260 +; RV32I-FPELIM-NEXT: addi t1, a0, -1967 +; RV32I-FPELIM-NEXT: lui a0, 964690 +; RV32I-FPELIM-NEXT: addi t2, a0, -328 +; RV32I-FPELIM-NEXT: lui a0, 335544 +; RV32I-FPELIM-NEXT: addi t3, a0, 1311 +; RV32I-FPELIM-NEXT: lui a5, 688509 +; RV32I-FPELIM-NEXT: addi a5, a5, -2048 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 11 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 ; RV32I-FPELIM-NEXT: li a3, 12 ; RV32I-FPELIM-NEXT: li a4, 13 -; RV32I-FPELIM-NEXT: sw a6, 20(sp) -; RV32I-FPELIM-NEXT: sw a5, 24(sp) ; RV32I-FPELIM-NEXT: li a6, 4 -; RV32I-FPELIM-NEXT: sw t1, 0(sp) -; RV32I-FPELIM-NEXT: sw t0, 8(sp) -; RV32I-FPELIM-NEXT: sw zero, 12(sp) -; RV32I-FPELIM-NEXT: sw a7, 16(sp) ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: addi t0, t2, 491 -; RV32I-FPELIM-NEXT: addi t1, t3, -1967 -; RV32I-FPELIM-NEXT: addi t2, t4, -328 -; RV32I-FPELIM-NEXT: addi t3, t5, 1311 -; RV32I-FPELIM-NEXT: addi a5, t6, -2048 ; RV32I-FPELIM-NEXT: sw t3, 32(sp) ; RV32I-FPELIM-NEXT: sw t2, 36(sp) ; RV32I-FPELIM-NEXT: sw t1, 40(sp) @@ -707,34 +707,34 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: li a5, 19 -; RV32I-WITHFP-NEXT: li a6, 18 -; RV32I-WITHFP-NEXT: li a7, 17 -; RV32I-WITHFP-NEXT: li t0, 16 -; RV32I-WITHFP-NEXT: li t1, 15 -; RV32I-WITHFP-NEXT: lui t2, 262153 -; RV32I-WITHFP-NEXT: lui t3, 545260 -; RV32I-WITHFP-NEXT: lui t4, 964690 -; RV32I-WITHFP-NEXT: lui t5, 335544 -; RV32I-WITHFP-NEXT: lui t6, 688509 +; RV32I-WITHFP-NEXT: li a0, 19 +; RV32I-WITHFP-NEXT: li a1, 18 +; RV32I-WITHFP-NEXT: sw a1, 20(sp) +; RV32I-WITHFP-NEXT: sw a0, 24(sp) +; RV32I-WITHFP-NEXT: li a0, 17 +; RV32I-WITHFP-NEXT: li a1, 15 +; RV32I-WITHFP-NEXT: li a2, 16 +; RV32I-WITHFP-NEXT: sw a1, 0(sp) +; RV32I-WITHFP-NEXT: sw a2, 8(sp) +; RV32I-WITHFP-NEXT: sw zero, 12(sp) +; RV32I-WITHFP-NEXT: sw a0, 16(sp) +; RV32I-WITHFP-NEXT: lui a0, 262153 +; RV32I-WITHFP-NEXT: addi t0, a0, 491 +; RV32I-WITHFP-NEXT: lui a0, 545260 +; RV32I-WITHFP-NEXT: addi t1, a0, -1967 +; RV32I-WITHFP-NEXT: lui a0, 964690 +; RV32I-WITHFP-NEXT: addi t2, a0, -328 +; RV32I-WITHFP-NEXT: lui a0, 335544 +; RV32I-WITHFP-NEXT: addi t3, a0, 1311 +; RV32I-WITHFP-NEXT: lui a5, 688509 +; RV32I-WITHFP-NEXT: addi a5, a5, -2048 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 11 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 ; RV32I-WITHFP-NEXT: li a3, 12 ; RV32I-WITHFP-NEXT: li a4, 13 -; RV32I-WITHFP-NEXT: sw a6, 20(sp) -; RV32I-WITHFP-NEXT: sw a5, 24(sp) ; RV32I-WITHFP-NEXT: li a6, 4 -; RV32I-WITHFP-NEXT: sw t1, 0(sp) -; RV32I-WITHFP-NEXT: sw t0, 8(sp) -; RV32I-WITHFP-NEXT: sw zero, 12(sp) -; RV32I-WITHFP-NEXT: sw a7, 16(sp) ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: addi t0, t2, 491 -; RV32I-WITHFP-NEXT: addi t1, t3, -1967 -; RV32I-WITHFP-NEXT: addi t2, t4, -328 -; RV32I-WITHFP-NEXT: addi t3, t5, 1311 -; RV32I-WITHFP-NEXT: addi a5, t6, -2048 ; RV32I-WITHFP-NEXT: sw t3, -32(s0) ; RV32I-WITHFP-NEXT: sw t2, -28(s0) ; RV32I-WITHFP-NEXT: sw t1, -24(s0) @@ -784,11 +784,11 @@ define i32 @caller_small_scalar_ret() nounwind { ; RV32I-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: call callee_small_scalar_ret ; RV32I-FPELIM-NEXT: lui a2, 56 +; RV32I-FPELIM-NEXT: lui a3, 200614 ; RV32I-FPELIM-NEXT: addi a2, a2, 580 +; RV32I-FPELIM-NEXT: addi a3, a3, 647 ; RV32I-FPELIM-NEXT: xor a1, a1, a2 -; RV32I-FPELIM-NEXT: lui a2, 200614 -; RV32I-FPELIM-NEXT: addi a2, a2, 647 -; RV32I-FPELIM-NEXT: xor a0, a0, a2 +; RV32I-FPELIM-NEXT: xor a0, a0, a3 ; RV32I-FPELIM-NEXT: or a0, a0, a1 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -803,11 +803,11 @@ define i32 @caller_small_scalar_ret() nounwind { ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: call callee_small_scalar_ret ; RV32I-WITHFP-NEXT: lui a2, 56 +; RV32I-WITHFP-NEXT: lui a3, 200614 ; RV32I-WITHFP-NEXT: addi a2, a2, 580 +; RV32I-WITHFP-NEXT: addi a3, a3, 647 ; RV32I-WITHFP-NEXT: xor a1, a1, a2 -; RV32I-WITHFP-NEXT: lui a2, 200614 -; RV32I-WITHFP-NEXT: addi a2, a2, 647 -; RV32I-WITHFP-NEXT: xor a0, a0, a2 +; RV32I-WITHFP-NEXT: xor a0, a0, a3 ; RV32I-WITHFP-NEXT: or a0, a0, a1 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll index 7630d5b8f77ef..f9d3e86cc84b9 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll @@ -37,9 +37,9 @@ define i32 @caller_double_in_fpr() nounwind { define i32 @callee_double_in_fpr_exhausted_gprs(i64 %a, i64 %b, i64 %c, i64 %d, i32 %e, double %f) nounwind { ; RV32-ILP32D-LABEL: callee_double_in_fpr_exhausted_gprs: ; RV32-ILP32D: # %bb.0: -; RV32-ILP32D-NEXT: lw a0, 0(sp) -; RV32-ILP32D-NEXT: fcvt.w.d a1, fa0, rtz -; RV32-ILP32D-NEXT: add a0, a0, a1 +; RV32-ILP32D-NEXT: fcvt.w.d a0, fa0, rtz +; RV32-ILP32D-NEXT: lw a1, 0(sp) +; RV32-ILP32D-NEXT: add a0, a1, a0 ; RV32-ILP32D-NEXT: ret %f_fptosi = fptosi double %f to i32 %1 = add i32 %e, %f_fptosi @@ -51,9 +51,9 @@ define i32 @caller_double_in_fpr_exhausted_gprs() nounwind { ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: li a1, 5 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI3_0) ; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI3_0)(a0) +; RV32-ILP32D-NEXT: li a1, 5 ; RV32-ILP32D-NEXT: li a0, 1 ; RV32-ILP32D-NEXT: li a2, 2 ; RV32-ILP32D-NEXT: li a4, 3 @@ -99,16 +99,16 @@ define i32 @caller_double_in_gpr_exhausted_fprs() nounwind { ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_0) ; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI5_1) ; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI5_0)(a0) -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_2) ; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI5_1)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_2) ; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI5_3) ; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI5_2)(a0) -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_4) ; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI5_3)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_4) ; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI5_5) ; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI5_4)(a0) -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_6) ; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI5_5)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI5_6) ; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI5_7) ; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI5_6)(a0) ; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI5_7)(a1) @@ -147,23 +147,23 @@ define i32 @caller_double_in_gpr_and_stack_almost_exhausted_gprs_fprs() nounwind ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_0) -; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI7_1) -; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI7_2) -; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI7_3) -; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI7_4) -; RV32-ILP32D-NEXT: lui a6, %hi(.LCPI7_5) -; RV32-ILP32D-NEXT: lui a7, %hi(.LCPI7_6) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_1) ; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI7_0)(a0) -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_7) -; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI7_1)(a2) -; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI7_2)(a3) -; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI7_3)(a4) -; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI7_4)(a5) -; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI7_5)(a6) -; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI7_6)(a7) -; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI7_7)(a0) +; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI7_1)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_2) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_3) +; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI7_2)(a0) +; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI7_3)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_4) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_5) +; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI7_4)(a0) +; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI7_5)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_6) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_7) +; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI7_6)(a0) +; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI7_7)(a1) +; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: li a0, 1 ; RV32-ILP32D-NEXT: li a2, 3 ; RV32-ILP32D-NEXT: li a4, 5 @@ -203,24 +203,24 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_0) -; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI9_1) -; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI9_2) -; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI9_3) -; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI9_4) -; RV32-ILP32D-NEXT: lui a6, %hi(.LCPI9_5) -; RV32-ILP32D-NEXT: lui a7, %hi(.LCPI9_6) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_1) ; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI9_0)(a0) -; RV32-ILP32D-NEXT: lui t0, %hi(.LCPI9_7) -; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI9_1)(a2) +; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI9_1)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_2) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_3) +; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI9_2)(a0) +; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI9_3)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_4) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_5) +; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI9_4)(a0) +; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI9_5)(a1) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_6) +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_7) +; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI9_6)(a0) +; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI9_7)(a1) +; RV32-ILP32D-NEXT: lui a1, 262816 ; RV32-ILP32D-NEXT: li a0, 1 -; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI9_2)(a3) -; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI9_3)(a4) -; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI9_4)(a5) -; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI9_5)(a6) -; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI9_6)(a7) -; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI9_7)(t0) ; RV32-ILP32D-NEXT: li a2, 3 ; RV32-ILP32D-NEXT: li a4, 5 ; RV32-ILP32D-NEXT: li a6, 7 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index 807fe9e3a581e..f996ed482a048 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -590,12 +590,12 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: lw a0, 0(a2) ; ILP32E-FPELIM-NEXT: lw a1, 12(sp) -; ILP32E-FPELIM-NEXT: lw a2, 4(sp) -; ILP32E-FPELIM-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-NEXT: lw a2, 8(sp) +; ILP32E-FPELIM-NEXT: lw a3, 4(sp) ; ILP32E-FPELIM-NEXT: lw a4, 24(sp) ; ILP32E-FPELIM-NEXT: lw a5, 20(sp) -; ILP32E-FPELIM-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, a3 +; ILP32E-FPELIM-NEXT: add a1, a2, a1 ; ILP32E-FPELIM-NEXT: add a0, a0, a1 ; ILP32E-FPELIM-NEXT: add a4, a5, a4 ; ILP32E-FPELIM-NEXT: add a0, a0, a4 @@ -613,12 +613,12 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: lw a0, 0(a2) ; ILP32E-WITHFP-NEXT: lw a1, 12(s0) -; ILP32E-WITHFP-NEXT: lw a2, 4(s0) -; ILP32E-WITHFP-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-NEXT: lw a2, 8(s0) +; ILP32E-WITHFP-NEXT: lw a3, 4(s0) ; ILP32E-WITHFP-NEXT: lw a4, 24(s0) ; ILP32E-WITHFP-NEXT: lw a5, 20(s0) -; ILP32E-WITHFP-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, a3 +; ILP32E-WITHFP-NEXT: add a1, a2, a1 ; ILP32E-WITHFP-NEXT: add a0, a0, a1 ; ILP32E-WITHFP-NEXT: add a4, a5, a4 ; ILP32E-WITHFP-NEXT: add a0, a0, a4 @@ -635,12 +635,12 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a2, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 @@ -656,12 +656,12 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a2, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 @@ -694,39 +694,39 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: li a3, 18 -; ILP32E-FPELIM-NEXT: li a4, 17 -; ILP32E-FPELIM-NEXT: li a5, 16 -; ILP32E-FPELIM-NEXT: lui a6, 262236 -; ILP32E-FPELIM-NEXT: lui a7, 377487 -; ILP32E-FPELIM-NEXT: li t0, 15 -; ILP32E-FPELIM-NEXT: li t1, 14 -; ILP32E-FPELIM-NEXT: li t2, 4 -; ILP32E-FPELIM-NEXT: lui t3, 262153 -; ILP32E-FPELIM-NEXT: lui t4, 545260 -; ILP32E-FPELIM-NEXT: lui t5, 964690 -; ILP32E-FPELIM-NEXT: lui t6, 335544 -; ILP32E-FPELIM-NEXT: lui s2, 688509 +; ILP32E-FPELIM-NEXT: li a0, 18 +; ILP32E-FPELIM-NEXT: li a1, 17 +; ILP32E-FPELIM-NEXT: lui a2, 262236 +; ILP32E-FPELIM-NEXT: li a3, 16 +; ILP32E-FPELIM-NEXT: addi a2, a2, 655 +; ILP32E-FPELIM-NEXT: lui a4, 377487 +; ILP32E-FPELIM-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a1, 24(sp) +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: addi a0, a4, 1475 +; ILP32E-FPELIM-NEXT: li a1, 15 +; ILP32E-FPELIM-NEXT: li a2, 4 +; ILP32E-FPELIM-NEXT: li a3, 14 +; ILP32E-FPELIM-NEXT: sw a2, 0(sp) +; ILP32E-FPELIM-NEXT: sw a3, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lui a0, 262153 +; ILP32E-FPELIM-NEXT: addi a6, a0, 491 +; ILP32E-FPELIM-NEXT: lui a0, 545260 +; ILP32E-FPELIM-NEXT: addi a7, a0, -1967 +; ILP32E-FPELIM-NEXT: lui a0, 964690 +; ILP32E-FPELIM-NEXT: addi t0, a0, -328 +; ILP32E-FPELIM-NEXT: lui a0, 335544 +; ILP32E-FPELIM-NEXT: addi t1, a0, 1311 +; ILP32E-FPELIM-NEXT: lui a5, 688509 +; ILP32E-FPELIM-NEXT: addi a5, a5, -2048 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 11 ; ILP32E-FPELIM-NEXT: addi a2, sp, 32 -; ILP32E-FPELIM-NEXT: addi a6, a6, 655 -; ILP32E-FPELIM-NEXT: sw a6, 16(sp) -; ILP32E-FPELIM-NEXT: sw a5, 20(sp) -; ILP32E-FPELIM-NEXT: sw a4, 24(sp) -; ILP32E-FPELIM-NEXT: sw a3, 28(sp) ; ILP32E-FPELIM-NEXT: li a3, 12 -; ILP32E-FPELIM-NEXT: addi a4, a7, 1475 -; ILP32E-FPELIM-NEXT: sw t2, 0(sp) -; ILP32E-FPELIM-NEXT: sw t1, 4(sp) -; ILP32E-FPELIM-NEXT: sw t0, 8(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-NEXT: li a4, 13 -; ILP32E-FPELIM-NEXT: addi a6, t3, 491 -; ILP32E-FPELIM-NEXT: addi a7, t4, -1967 -; ILP32E-FPELIM-NEXT: addi t0, t5, -328 -; ILP32E-FPELIM-NEXT: addi t1, t6, 1311 -; ILP32E-FPELIM-NEXT: addi a5, s2, -2048 ; ILP32E-FPELIM-NEXT: sw t1, 32(sp) ; ILP32E-FPELIM-NEXT: sw t0, 36(sp) ; ILP32E-FPELIM-NEXT: sw a7, 40(sp) @@ -753,39 +753,39 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: li a3, 18 -; ILP32E-WITHFP-NEXT: li a4, 17 -; ILP32E-WITHFP-NEXT: li a5, 16 -; ILP32E-WITHFP-NEXT: lui a6, 262236 -; ILP32E-WITHFP-NEXT: lui a7, 377487 -; ILP32E-WITHFP-NEXT: li t0, 15 -; ILP32E-WITHFP-NEXT: li t1, 14 -; ILP32E-WITHFP-NEXT: li t2, 4 -; ILP32E-WITHFP-NEXT: lui t3, 262153 -; ILP32E-WITHFP-NEXT: lui t4, 545260 -; ILP32E-WITHFP-NEXT: lui t5, 964690 -; ILP32E-WITHFP-NEXT: lui t6, 335544 -; ILP32E-WITHFP-NEXT: lui s2, 688509 +; ILP32E-WITHFP-NEXT: li a0, 18 +; ILP32E-WITHFP-NEXT: li a1, 17 +; ILP32E-WITHFP-NEXT: lui a2, 262236 +; ILP32E-WITHFP-NEXT: li a3, 16 +; ILP32E-WITHFP-NEXT: addi a2, a2, 655 +; ILP32E-WITHFP-NEXT: lui a4, 377487 +; ILP32E-WITHFP-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-NEXT: sw a3, 20(sp) +; ILP32E-WITHFP-NEXT: sw a1, 24(sp) +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: addi a0, a4, 1475 +; ILP32E-WITHFP-NEXT: li a1, 15 +; ILP32E-WITHFP-NEXT: li a2, 4 +; ILP32E-WITHFP-NEXT: li a3, 14 +; ILP32E-WITHFP-NEXT: sw a2, 0(sp) +; ILP32E-WITHFP-NEXT: sw a3, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: lui a0, 262153 +; ILP32E-WITHFP-NEXT: addi a6, a0, 491 +; ILP32E-WITHFP-NEXT: lui a0, 545260 +; ILP32E-WITHFP-NEXT: addi a7, a0, -1967 +; ILP32E-WITHFP-NEXT: lui a0, 964690 +; ILP32E-WITHFP-NEXT: addi t0, a0, -328 +; ILP32E-WITHFP-NEXT: lui a0, 335544 +; ILP32E-WITHFP-NEXT: addi t1, a0, 1311 +; ILP32E-WITHFP-NEXT: lui a5, 688509 +; ILP32E-WITHFP-NEXT: addi a5, a5, -2048 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 11 ; ILP32E-WITHFP-NEXT: addi a2, sp, 32 -; ILP32E-WITHFP-NEXT: addi a6, a6, 655 -; ILP32E-WITHFP-NEXT: sw a6, 16(sp) -; ILP32E-WITHFP-NEXT: sw a5, 20(sp) -; ILP32E-WITHFP-NEXT: sw a4, 24(sp) -; ILP32E-WITHFP-NEXT: sw a3, 28(sp) ; ILP32E-WITHFP-NEXT: li a3, 12 -; ILP32E-WITHFP-NEXT: addi a4, a7, 1475 -; ILP32E-WITHFP-NEXT: sw t2, 0(sp) -; ILP32E-WITHFP-NEXT: sw t1, 4(sp) -; ILP32E-WITHFP-NEXT: sw t0, 8(sp) -; ILP32E-WITHFP-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-NEXT: li a4, 13 -; ILP32E-WITHFP-NEXT: addi a6, t3, 491 -; ILP32E-WITHFP-NEXT: addi a7, t4, -1967 -; ILP32E-WITHFP-NEXT: addi t0, t5, -328 -; ILP32E-WITHFP-NEXT: addi t1, t6, 1311 -; ILP32E-WITHFP-NEXT: addi a5, s2, -2048 ; ILP32E-WITHFP-NEXT: sw t1, 32(sp) ; ILP32E-WITHFP-NEXT: sw t0, 36(sp) ; ILP32E-WITHFP-NEXT: sw a7, 40(sp) @@ -812,39 +812,39 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 18 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 17 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a6, 262236 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a7, 377487 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t0, 15 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t1, 14 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t2, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t3, 262153 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t4, 545260 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t5, 964690 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t6, 335544 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui s2, 688509 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 17 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a2, 262236 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, a2, 655 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a4, 377487 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a4, 1475 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 15 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 14 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 491 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, a0, -1967 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, a0, -328 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t1, a0, 1311 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a5, 688509 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a5, -2048 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 11 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a6, 655 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 16(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 20(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a4, a7, 1475 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t2, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, t3, 491 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, t4, -1967 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, t5, -328 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t1, t6, 1311 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, s2, -2048 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 32(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 36(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 40(sp) @@ -867,39 +867,39 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 18 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 17 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a6, 262236 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a7, 377487 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t0, 15 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t1, 14 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t2, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t3, 262153 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t4, 545260 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t5, 964690 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t6, 335544 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui s2, 688509 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 17 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a2, 262236 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, a2, 655 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a4, 377487 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a4, 1475 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 15 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 14 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 491 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, a0, -1967 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, a0, -328 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t1, a0, 1311 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a5, 688509 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a5, -2048 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 11 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a6, 655 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 16(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 20(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 28(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a4, a7, 1475 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t2, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, t3, 491 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, t4, -1967 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, t5, -328 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t1, t6, 1311 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, s2, -2048 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 32(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 36(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 40(sp) @@ -1149,19 +1149,19 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-FPELIM-LABEL: callee_many_scalars: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: lw a6, 12(sp) -; ILP32E-FPELIM-NEXT: lw a7, 0(sp) -; ILP32E-FPELIM-NEXT: lw t0, 4(sp) -; ILP32E-FPELIM-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-NEXT: lw a7, 8(sp) +; ILP32E-FPELIM-NEXT: lw t0, 0(sp) +; ILP32E-FPELIM-NEXT: lw t1, 4(sp) ; ILP32E-FPELIM-NEXT: zext.b a0, a0 ; ILP32E-FPELIM-NEXT: slli a1, a1, 16 ; ILP32E-FPELIM-NEXT: srli a1, a1, 16 ; ILP32E-FPELIM-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a4, a7 +; ILP32E-FPELIM-NEXT: xor a2, a3, t1 ; ILP32E-FPELIM-NEXT: add a0, a0, a5 -; ILP32E-FPELIM-NEXT: xor a1, a4, t1 -; ILP32E-FPELIM-NEXT: xor a2, a3, t0 -; ILP32E-FPELIM-NEXT: add a0, a0, a7 ; ILP32E-FPELIM-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-NEXT: add a0, a0, t0 ; ILP32E-FPELIM-NEXT: seqz a1, a1 ; ILP32E-FPELIM-NEXT: add a0, a0, a6 ; ILP32E-FPELIM-NEXT: add a0, a1, a0 @@ -1178,19 +1178,19 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: lw a6, 12(s0) -; ILP32E-WITHFP-NEXT: lw a7, 0(s0) -; ILP32E-WITHFP-NEXT: lw t0, 4(s0) -; ILP32E-WITHFP-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-NEXT: lw a7, 8(s0) +; ILP32E-WITHFP-NEXT: lw t0, 0(s0) +; ILP32E-WITHFP-NEXT: lw t1, 4(s0) ; ILP32E-WITHFP-NEXT: zext.b a0, a0 ; ILP32E-WITHFP-NEXT: slli a1, a1, 16 ; ILP32E-WITHFP-NEXT: srli a1, a1, 16 ; ILP32E-WITHFP-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a4, a7 +; ILP32E-WITHFP-NEXT: xor a2, a3, t1 ; ILP32E-WITHFP-NEXT: add a0, a0, a5 -; ILP32E-WITHFP-NEXT: xor a1, a4, t1 -; ILP32E-WITHFP-NEXT: xor a2, a3, t0 -; ILP32E-WITHFP-NEXT: add a0, a0, a7 ; ILP32E-WITHFP-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-NEXT: add a0, a0, t0 ; ILP32E-WITHFP-NEXT: seqz a1, a1 ; ILP32E-WITHFP-NEXT: add a0, a0, a6 ; ILP32E-WITHFP-NEXT: add a0, a1, a0 @@ -1206,19 +1206,19 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_many_scalars: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t0, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t0, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw t1, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: zext.b a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: slli a1, a1, 16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: srli a1, a1, 16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, a7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a4, t1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, t0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a7 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, t0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a1, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a6 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a1, a0 @@ -1233,19 +1233,19 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t0, 4(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t1, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 8(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t0, 0(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw t1, 4(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: zext.b a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: slli a1, a1, 16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: srli a1, a1, 16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, a7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a4, t1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, t0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a7 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, t0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a1, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a6 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a1, a0 @@ -1390,16 +1390,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-FPELIM-NEXT: lw a3, 4(a1) ; ILP32E-FPELIM-NEXT: lw a4, 8(a1) ; ILP32E-FPELIM-NEXT: lw a1, 12(a1) -; ILP32E-FPELIM-NEXT: lw a5, 12(a0) -; ILP32E-FPELIM-NEXT: lw a6, 4(a0) -; ILP32E-FPELIM-NEXT: lw a7, 8(a0) -; ILP32E-FPELIM-NEXT: lw a0, 0(a0) -; ILP32E-FPELIM-NEXT: xor a1, a5, a1 -; ILP32E-FPELIM-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: xor a0, a0, a2 +; ILP32E-FPELIM-NEXT: lw a5, 4(a0) +; ILP32E-FPELIM-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-NEXT: lw a7, 0(a0) +; ILP32E-FPELIM-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-NEXT: xor a1, a6, a1 +; ILP32E-FPELIM-NEXT: xor a3, a5, a3 +; ILP32E-FPELIM-NEXT: xor a0, a0, a4 +; ILP32E-FPELIM-NEXT: xor a2, a7, a2 ; ILP32E-FPELIM-NEXT: or a1, a3, a1 -; ILP32E-FPELIM-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-NEXT: or a0, a0, a1 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret @@ -1418,16 +1418,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-NEXT: lw a3, 4(a1) ; ILP32E-WITHFP-NEXT: lw a4, 8(a1) ; ILP32E-WITHFP-NEXT: lw a1, 12(a1) -; ILP32E-WITHFP-NEXT: lw a5, 12(a0) -; ILP32E-WITHFP-NEXT: lw a6, 4(a0) -; ILP32E-WITHFP-NEXT: lw a7, 8(a0) -; ILP32E-WITHFP-NEXT: lw a0, 0(a0) -; ILP32E-WITHFP-NEXT: xor a1, a5, a1 -; ILP32E-WITHFP-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: xor a0, a0, a2 +; ILP32E-WITHFP-NEXT: lw a5, 4(a0) +; ILP32E-WITHFP-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-NEXT: lw a7, 0(a0) +; ILP32E-WITHFP-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-NEXT: xor a1, a6, a1 +; ILP32E-WITHFP-NEXT: xor a3, a5, a3 +; ILP32E-WITHFP-NEXT: xor a0, a0, a4 +; ILP32E-WITHFP-NEXT: xor a2, a7, a2 ; ILP32E-WITHFP-NEXT: or a1, a3, a1 -; ILP32E-WITHFP-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-NEXT: or a0, a0, a1 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 8 @@ -1445,16 +1445,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 8(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 4(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 8(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a5, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 4(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a6, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a5, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a7, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a3, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret @@ -1471,16 +1471,16 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 8(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 4(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 8(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a5, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 4(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a6, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a5, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a7, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a3, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 8 @@ -1503,13 +1503,13 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 48 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: lui a1, 524272 -; ILP32E-FPELIM-NEXT: li a2, 1 -; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: lui a0, 524272 ; ILP32E-FPELIM-NEXT: sw zero, 0(sp) ; ILP32E-FPELIM-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: li a2, 1 +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-NEXT: mv a1, sp ; ILP32E-FPELIM-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-NEXT: sw zero, 28(sp) @@ -1537,13 +1537,13 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 48 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: lui a1, 524272 -; ILP32E-WITHFP-NEXT: li a2, 1 -; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: lui a0, 524272 ; ILP32E-WITHFP-NEXT: sw zero, 0(sp) ; ILP32E-WITHFP-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-NEXT: sw a1, 12(sp) +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: li a2, 1 +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 ; ILP32E-WITHFP-NEXT: mv a1, sp ; ILP32E-WITHFP-NEXT: sw a2, 24(sp) ; ILP32E-WITHFP-NEXT: sw zero, 28(sp) @@ -1571,13 +1571,13 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) @@ -1601,13 +1601,13 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) @@ -1636,16 +1636,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-FPELIM-NEXT: lw a3, 4(a0) ; ILP32E-FPELIM-NEXT: lw a4, 8(a0) ; ILP32E-FPELIM-NEXT: lw a0, 12(a0) -; ILP32E-FPELIM-NEXT: lw a5, 12(a1) -; ILP32E-FPELIM-NEXT: lw a6, 4(a1) -; ILP32E-FPELIM-NEXT: lw a7, 8(a1) -; ILP32E-FPELIM-NEXT: lw a1, 0(a1) -; ILP32E-FPELIM-NEXT: xor a0, a5, a0 -; ILP32E-FPELIM-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: xor a1, a1, a2 +; ILP32E-FPELIM-NEXT: lw a5, 4(a1) +; ILP32E-FPELIM-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-NEXT: lw a7, 0(a1) +; ILP32E-FPELIM-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-NEXT: xor a0, a6, a0 +; ILP32E-FPELIM-NEXT: xor a3, a5, a3 +; ILP32E-FPELIM-NEXT: xor a1, a1, a4 +; ILP32E-FPELIM-NEXT: xor a2, a7, a2 ; ILP32E-FPELIM-NEXT: or a0, a3, a0 -; ILP32E-FPELIM-NEXT: or a1, a1, a4 +; ILP32E-FPELIM-NEXT: or a1, a2, a1 ; ILP32E-FPELIM-NEXT: or a0, a1, a0 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret @@ -1666,16 +1666,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-NEXT: lw a3, 4(a0) ; ILP32E-WITHFP-NEXT: lw a4, 8(a0) ; ILP32E-WITHFP-NEXT: lw a0, 12(a0) -; ILP32E-WITHFP-NEXT: lw a5, 12(a1) -; ILP32E-WITHFP-NEXT: lw a6, 4(a1) -; ILP32E-WITHFP-NEXT: lw a7, 8(a1) -; ILP32E-WITHFP-NEXT: lw a1, 0(a1) -; ILP32E-WITHFP-NEXT: xor a0, a5, a0 -; ILP32E-WITHFP-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: xor a1, a1, a2 +; ILP32E-WITHFP-NEXT: lw a5, 4(a1) +; ILP32E-WITHFP-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-NEXT: lw a7, 0(a1) +; ILP32E-WITHFP-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-NEXT: xor a0, a6, a0 +; ILP32E-WITHFP-NEXT: xor a3, a5, a3 +; ILP32E-WITHFP-NEXT: xor a1, a1, a4 +; ILP32E-WITHFP-NEXT: xor a2, a7, a2 ; ILP32E-WITHFP-NEXT: or a0, a3, a0 -; ILP32E-WITHFP-NEXT: or a1, a1, a4 +; ILP32E-WITHFP-NEXT: or a1, a2, a1 ; ILP32E-WITHFP-NEXT: or a0, a1, a0 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 8 @@ -1695,16 +1695,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 8(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 4(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 8(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a5, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a1, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 4(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 0(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a6, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a5, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a1, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a7, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a3, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a1, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a2, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret @@ -1723,16 +1723,16 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 8(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 4(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 8(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a5, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a1, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 4(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 0(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a6, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a5, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a1, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a7, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a3, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a1, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a2, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 8 @@ -1755,27 +1755,27 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: addi a4, sp, 16 -; ILP32E-FPELIM-NEXT: li a5, 9 -; ILP32E-FPELIM-NEXT: addi a6, sp, 40 -; ILP32E-FPELIM-NEXT: li a7, 7 -; ILP32E-FPELIM-NEXT: lui t0, 524272 -; ILP32E-FPELIM-NEXT: li t1, 8 +; ILP32E-FPELIM-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-NEXT: li a1, 9 +; ILP32E-FPELIM-NEXT: addi a2, sp, 40 +; ILP32E-FPELIM-NEXT: li a3, 7 +; ILP32E-FPELIM-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-NEXT: li a6, 8 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 2 ; ILP32E-FPELIM-NEXT: li a2, 3 ; ILP32E-FPELIM-NEXT: li a3, 4 -; ILP32E-FPELIM-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-NEXT: sw a6, 4(sp) -; ILP32E-FPELIM-NEXT: sw a5, 8(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-NEXT: li a4, 5 -; ILP32E-FPELIM-NEXT: sw zero, 16(sp) -; ILP32E-FPELIM-NEXT: sw zero, 20(sp) -; ILP32E-FPELIM-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-NEXT: sw t0, 28(sp) ; ILP32E-FPELIM-NEXT: li a5, 6 -; ILP32E-FPELIM-NEXT: sw t1, 40(sp) +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) ; ILP32E-FPELIM-NEXT: sw zero, 44(sp) ; ILP32E-FPELIM-NEXT: sw zero, 48(sp) ; ILP32E-FPELIM-NEXT: sw zero, 52(sp) @@ -1801,27 +1801,27 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: addi a4, sp, 16 -; ILP32E-WITHFP-NEXT: li a5, 9 -; ILP32E-WITHFP-NEXT: addi a6, sp, 40 -; ILP32E-WITHFP-NEXT: li a7, 7 -; ILP32E-WITHFP-NEXT: lui t0, 524272 -; ILP32E-WITHFP-NEXT: li t1, 8 +; ILP32E-WITHFP-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-NEXT: li a1, 9 +; ILP32E-WITHFP-NEXT: addi a2, sp, 40 +; ILP32E-WITHFP-NEXT: li a3, 7 +; ILP32E-WITHFP-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-NEXT: li a6, 8 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 2 ; ILP32E-WITHFP-NEXT: li a2, 3 ; ILP32E-WITHFP-NEXT: li a3, 4 -; ILP32E-WITHFP-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-NEXT: sw a6, 4(sp) -; ILP32E-WITHFP-NEXT: sw a5, 8(sp) -; ILP32E-WITHFP-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-NEXT: li a4, 5 -; ILP32E-WITHFP-NEXT: sw zero, 16(sp) -; ILP32E-WITHFP-NEXT: sw zero, 20(sp) -; ILP32E-WITHFP-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-NEXT: sw t0, 28(sp) ; ILP32E-WITHFP-NEXT: li a5, 6 -; ILP32E-WITHFP-NEXT: sw t1, 40(sp) +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) ; ILP32E-WITHFP-NEXT: sw zero, 44(sp) ; ILP32E-WITHFP-NEXT: sw zero, 48(sp) ; ILP32E-WITHFP-NEXT: sw zero, 52(sp) @@ -1847,27 +1847,27 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a4, sp, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 9 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, sp, 40 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 7 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t0, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t1, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 9 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 8 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 28(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 40(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp) @@ -1889,27 +1889,27 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a4, sp, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 9 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, sp, 40 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 7 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t0, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t1, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 9 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 28(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 40(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp) diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll index dabd2a7ce9a73..fbbdf7579f930 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll @@ -40,9 +40,9 @@ define i32 @caller_float_in_fpr() nounwind { define i32 @callee_float_in_fpr_exhausted_gprs(i64 %a, i64 %b, i64 %c, i64 %d, i32 %e, float %f) nounwind { ; RV32-ILP32FD-LABEL: callee_float_in_fpr_exhausted_gprs: ; RV32-ILP32FD: # %bb.0: -; RV32-ILP32FD-NEXT: lw a0, 0(sp) -; RV32-ILP32FD-NEXT: fcvt.w.s a1, fa0, rtz -; RV32-ILP32FD-NEXT: add a0, a0, a1 +; RV32-ILP32FD-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-ILP32FD-NEXT: lw a1, 0(sp) +; RV32-ILP32FD-NEXT: add a0, a1, a0 ; RV32-ILP32FD-NEXT: ret %f_fptosi = fptosi float %f to i32 %1 = add i32 %e, %f_fptosi @@ -54,12 +54,12 @@ define i32 @caller_float_in_fpr_exhausted_gprs() nounwind { ; RV32-ILP32FD: # %bb.0: ; RV32-ILP32FD-NEXT: addi sp, sp, -16 ; RV32-ILP32FD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32FD-NEXT: lui a0, 265216 +; RV32-ILP32FD-NEXT: fmv.w.x fa0, a0 ; RV32-ILP32FD-NEXT: li a1, 5 -; RV32-ILP32FD-NEXT: lui a3, 265216 ; RV32-ILP32FD-NEXT: li a0, 1 ; RV32-ILP32FD-NEXT: li a2, 2 ; RV32-ILP32FD-NEXT: li a4, 3 -; RV32-ILP32FD-NEXT: fmv.w.x fa0, a3 ; RV32-ILP32FD-NEXT: li a6, 4 ; RV32-ILP32FD-NEXT: sw a1, 0(sp) ; RV32-ILP32FD-NEXT: li a1, 0 @@ -98,16 +98,16 @@ define i32 @caller_float_in_gpr_exhausted_fprs() nounwind { ; RV32-ILP32FD-NEXT: lui a0, 260096 ; RV32-ILP32FD-NEXT: lui a1, 262144 ; RV32-ILP32FD-NEXT: fmv.w.x fa0, a0 -; RV32-ILP32FD-NEXT: lui a0, 263168 ; RV32-ILP32FD-NEXT: fmv.w.x fa1, a1 +; RV32-ILP32FD-NEXT: lui a0, 263168 ; RV32-ILP32FD-NEXT: lui a1, 264192 ; RV32-ILP32FD-NEXT: fmv.w.x fa2, a0 -; RV32-ILP32FD-NEXT: lui a0, 264704 ; RV32-ILP32FD-NEXT: fmv.w.x fa3, a1 +; RV32-ILP32FD-NEXT: lui a0, 264704 ; RV32-ILP32FD-NEXT: lui a1, 265216 ; RV32-ILP32FD-NEXT: fmv.w.x fa4, a0 -; RV32-ILP32FD-NEXT: lui a0, 265728 ; RV32-ILP32FD-NEXT: fmv.w.x fa5, a1 +; RV32-ILP32FD-NEXT: lui a0, 265728 ; RV32-ILP32FD-NEXT: lui a1, 266240 ; RV32-ILP32FD-NEXT: fmv.w.x fa6, a0 ; RV32-ILP32FD-NEXT: fmv.w.x fa7, a1 @@ -141,26 +141,26 @@ define i32 @caller_float_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32FD: # %bb.0: ; RV32-ILP32FD-NEXT: addi sp, sp, -16 ; RV32-ILP32FD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32FD-NEXT: lui a1, 267520 ; RV32-ILP32FD-NEXT: lui a0, 262144 -; RV32-ILP32FD-NEXT: lui a2, 264192 -; RV32-ILP32FD-NEXT: lui a3, 265216 -; RV32-ILP32FD-NEXT: lui a4, 266240 -; RV32-ILP32FD-NEXT: lui a5, 266496 -; RV32-ILP32FD-NEXT: lui a6, 266752 -; RV32-ILP32FD-NEXT: lui a7, 267008 +; RV32-ILP32FD-NEXT: lui a1, 264192 ; RV32-ILP32FD-NEXT: fmv.w.x fa0, a0 -; RV32-ILP32FD-NEXT: lui t0, 267264 -; RV32-ILP32FD-NEXT: fmv.w.x fa1, a2 +; RV32-ILP32FD-NEXT: fmv.w.x fa1, a1 +; RV32-ILP32FD-NEXT: lui a0, 265216 +; RV32-ILP32FD-NEXT: lui a1, 266240 +; RV32-ILP32FD-NEXT: fmv.w.x fa2, a0 +; RV32-ILP32FD-NEXT: fmv.w.x fa3, a1 +; RV32-ILP32FD-NEXT: lui a0, 266496 +; RV32-ILP32FD-NEXT: lui a1, 266752 +; RV32-ILP32FD-NEXT: fmv.w.x fa4, a0 +; RV32-ILP32FD-NEXT: fmv.w.x fa5, a1 +; RV32-ILP32FD-NEXT: lui a0, 267008 +; RV32-ILP32FD-NEXT: lui a1, 267264 +; RV32-ILP32FD-NEXT: fmv.w.x fa6, a0 +; RV32-ILP32FD-NEXT: fmv.w.x fa7, a1 +; RV32-ILP32FD-NEXT: lui a1, 267520 ; RV32-ILP32FD-NEXT: li a0, 1 -; RV32-ILP32FD-NEXT: fmv.w.x fa2, a3 ; RV32-ILP32FD-NEXT: li a2, 3 -; RV32-ILP32FD-NEXT: fmv.w.x fa3, a4 ; RV32-ILP32FD-NEXT: li a4, 5 -; RV32-ILP32FD-NEXT: fmv.w.x fa4, a5 -; RV32-ILP32FD-NEXT: fmv.w.x fa5, a6 -; RV32-ILP32FD-NEXT: fmv.w.x fa6, a7 -; RV32-ILP32FD-NEXT: fmv.w.x fa7, t0 ; RV32-ILP32FD-NEXT: li a6, 7 ; RV32-ILP32FD-NEXT: sw a1, 0(sp) ; RV32-ILP32FD-NEXT: li a1, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index a63dc0ef3a3a7..a55b6e650a43d 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -48,20 +48,20 @@ define i64 @caller_i128_in_regs() nounwind { define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i128 %d, i32 %e, i32 %f, i128 %g, i32 %h) nounwind { ; RV64I-LABEL: callee_many_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: lw t0, 8(sp) -; RV64I-NEXT: ld t1, 0(sp) +; RV64I-NEXT: ld t0, 0(sp) +; RV64I-NEXT: lw t1, 8(sp) ; RV64I-NEXT: zext.b a0, a0 ; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: xor a3, a3, a7 ; RV64I-NEXT: srli a1, a1, 48 ; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: xor a1, a4, t0 +; RV64I-NEXT: xor a2, a3, a7 ; RV64I-NEXT: add a0, a0, a5 -; RV64I-NEXT: xor a1, a4, t1 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: add a0, a0, a6 -; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: seqz a1, a1 -; RV64I-NEXT: add a0, a0, t0 +; RV64I-NEXT: add a0, a0, t1 ; RV64I-NEXT: addw a0, a1, a0 ; RV64I-NEXT: ret %a_ext = zext i8 %a to i32 @@ -110,16 +110,16 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-NEXT: ld a3, 8(a1) ; RV64I-NEXT: ld a4, 16(a1) ; RV64I-NEXT: ld a1, 24(a1) -; RV64I-NEXT: ld a5, 24(a0) -; RV64I-NEXT: ld a6, 8(a0) -; RV64I-NEXT: ld a7, 16(a0) -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: xor a1, a5, a1 -; RV64I-NEXT: xor a3, a6, a3 -; RV64I-NEXT: xor a4, a7, a4 -; RV64I-NEXT: xor a0, a0, a2 +; RV64I-NEXT: ld a5, 8(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: ld a7, 0(a0) +; RV64I-NEXT: ld a0, 16(a0) +; RV64I-NEXT: xor a1, a6, a1 +; RV64I-NEXT: xor a3, a5, a3 +; RV64I-NEXT: xor a0, a0, a4 +; RV64I-NEXT: xor a2, a7, a2 ; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret @@ -133,15 +133,15 @@ define i64 @caller_large_scalars() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a2, 2 -; RV64I-NEXT: li a3, 1 -; RV64I-NEXT: addi a0, sp, 32 -; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: sd a2, 0(sp) +; RV64I-NEXT: li a0, 2 +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: addi a0, sp, 32 +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: sd a2, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) @@ -165,16 +165,16 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, ; RV64I-NEXT: ld a2, 8(a7) ; RV64I-NEXT: ld a3, 16(a7) ; RV64I-NEXT: ld a4, 24(a7) -; RV64I-NEXT: ld a5, 24(a0) -; RV64I-NEXT: ld a6, 8(a0) -; RV64I-NEXT: ld a7, 16(a0) -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: xor a4, a4, a5 -; RV64I-NEXT: xor a2, a2, a6 -; RV64I-NEXT: xor a3, a3, a7 -; RV64I-NEXT: xor a0, a1, a0 +; RV64I-NEXT: ld a5, 8(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: ld a7, 0(a0) +; RV64I-NEXT: ld a0, 16(a0) +; RV64I-NEXT: xor a4, a4, a6 +; RV64I-NEXT: xor a2, a2, a5 +; RV64I-NEXT: xor a0, a3, a0 +; RV64I-NEXT: xor a1, a1, a7 ; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret @@ -188,10 +188,16 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -96 ; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a7, sp, 16 -; RV64I-NEXT: li t0, 9 -; RV64I-NEXT: li t1, 10 -; RV64I-NEXT: li t2, 8 +; RV64I-NEXT: addi a0, sp, 16 +; RV64I-NEXT: li a1, 9 +; RV64I-NEXT: li a2, 10 +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: sd a0, 8(sp) +; RV64I-NEXT: sd a2, 16(sp) +; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: sd zero, 32(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: li t0, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 @@ -199,14 +205,8 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 ; RV64I-NEXT: li a6, 7 -; RV64I-NEXT: sd t0, 0(sp) -; RV64I-NEXT: sd a7, 8(sp) ; RV64I-NEXT: addi a7, sp, 48 -; RV64I-NEXT: sd t1, 16(sp) -; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd zero, 40(sp) -; RV64I-NEXT: sd t2, 48(sp) +; RV64I-NEXT: sd t0, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd zero, 64(sp) ; RV64I-NEXT: sd zero, 72(sp) @@ -329,13 +329,13 @@ define i64 @callee_aligned_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i128 %f ; RV64I-LABEL: callee_aligned_stack: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 32(sp) -; RV64I-NEXT: ld a1, 0(sp) -; RV64I-NEXT: ld a2, 16(sp) +; RV64I-NEXT: ld a1, 16(sp) +; RV64I-NEXT: ld a2, 0(sp) ; RV64I-NEXT: ld a3, 40(sp) ; RV64I-NEXT: add a5, a5, a7 -; RV64I-NEXT: add a1, a5, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a2, a5, a2 ; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: add a0, a0, a3 ; RV64I-NEXT: ret %f_trunc = trunc i128 %f to i64 @@ -356,24 +356,24 @@ define void @caller_aligned_stack() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a6, 12 -; RV64I-NEXT: li a7, 11 -; RV64I-NEXT: li t0, 10 -; RV64I-NEXT: li t1, 9 -; RV64I-NEXT: li t2, 8 +; RV64I-NEXT: li a0, 12 +; RV64I-NEXT: li a1, 11 +; RV64I-NEXT: sd a1, 40(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: li a6, 10 +; RV64I-NEXT: li t0, 9 +; RV64I-NEXT: li t1, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 -; RV64I-NEXT: sd a7, 40(sp) -; RV64I-NEXT: sd a6, 48(sp) ; RV64I-NEXT: li a7, 7 -; RV64I-NEXT: sd t2, 0(sp) -; RV64I-NEXT: sd t1, 16(sp) +; RV64I-NEXT: sd t1, 0(sp) +; RV64I-NEXT: sd t0, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd t0, 32(sp) +; RV64I-NEXT: sd a6, 32(sp) ; RV64I-NEXT: li a6, 0 ; RV64I-NEXT: call callee_aligned_stack ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -448,13 +448,13 @@ define i64 @caller_small_struct_ret() nounwind { define i256 @callee_large_scalar_ret() nounwind { ; RV64I-LABEL: callee_large_scalar_ret: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, -1 -; RV64I-NEXT: lui a2, 1018435 -; RV64I-NEXT: addi a2, a2, 747 -; RV64I-NEXT: sd a2, 0(a0) -; RV64I-NEXT: sd a1, 8(a0) -; RV64I-NEXT: sd a1, 16(a0) -; RV64I-NEXT: sd a1, 24(a0) +; RV64I-NEXT: lui a1, 1018435 +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: addi a1, a1, 747 +; RV64I-NEXT: sd a1, 0(a0) +; RV64I-NEXT: sd a2, 8(a0) +; RV64I-NEXT: sd a2, 16(a0) +; RV64I-NEXT: sd a2, 24(a0) ; RV64I-NEXT: ret ret i256 -123456789 } @@ -480,15 +480,15 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV64I: # %bb.0: ; RV64I-NEXT: li a1, 1 ; RV64I-NEXT: li a2, 2 -; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: li a4, 4 ; RV64I-NEXT: sw a1, 0(a0) ; RV64I-NEXT: sw zero, 4(a0) ; RV64I-NEXT: sw a2, 8(a0) ; RV64I-NEXT: sw zero, 12(a0) -; RV64I-NEXT: sw a3, 16(a0) +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: li a2, 4 +; RV64I-NEXT: sw a1, 16(a0) ; RV64I-NEXT: sw zero, 20(a0) -; RV64I-NEXT: sw a4, 24(a0) +; RV64I-NEXT: sw a2, 24(a0) ; RV64I-NEXT: sw zero, 28(a0) ; RV64I-NEXT: ret store i64 1, ptr %agg.result, align 4 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll index 4153cad1ae881..fd8195fa2466a 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll @@ -7,14 +7,14 @@ define <2 x float> @callee_v2f32(<2 x float> %x, <2 x float> %y) { ; RV64-LABEL: callee_v2f32: ; RV64: # %bb.0: -; RV64-NEXT: fmv.w.x fa5, a2 -; RV64-NEXT: fmv.w.x fa4, a0 -; RV64-NEXT: fmv.w.x fa3, a3 -; RV64-NEXT: fmv.w.x fa2, a1 -; RV64-NEXT: fadd.s fa3, fa2, fa3 +; RV64-NEXT: fmv.w.x fa5, a3 +; RV64-NEXT: fmv.w.x fa4, a1 +; RV64-NEXT: fmv.w.x fa3, a2 +; RV64-NEXT: fmv.w.x fa2, a0 ; RV64-NEXT: fadd.s fa5, fa4, fa5 -; RV64-NEXT: fmv.x.w a0, fa5 -; RV64-NEXT: fmv.x.w a1, fa3 +; RV64-NEXT: fadd.s fa4, fa2, fa3 +; RV64-NEXT: fmv.x.w a0, fa4 +; RV64-NEXT: fmv.x.w a1, fa5 ; RV64-NEXT: ret ; ; RV64LP64F-LABEL: callee_v2f32: @@ -29,22 +29,22 @@ define <2 x float> @callee_v2f32(<2 x float> %x, <2 x float> %y) { define <4 x float> @callee_v4f32(<4 x float> %x, <4 x float> %y) { ; RV64-LABEL: callee_v4f32: ; RV64: # %bb.0: -; RV64-NEXT: fmv.w.x fa5, a4 -; RV64-NEXT: fmv.w.x fa4, a7 -; RV64-NEXT: fmv.w.x fa3, a3 -; RV64-NEXT: fmv.w.x fa2, a6 -; RV64-NEXT: fmv.w.x fa1, a2 -; RV64-NEXT: fmv.w.x fa0, a5 -; RV64-NEXT: fmv.w.x ft0, a1 +; RV64-NEXT: fmv.w.x fa5, a5 +; RV64-NEXT: fmv.w.x fa4, a1 +; RV64-NEXT: fmv.w.x fa3, a6 +; RV64-NEXT: fmv.w.x fa2, a2 +; RV64-NEXT: fmv.w.x fa1, a7 +; RV64-NEXT: fmv.w.x fa0, a3 +; RV64-NEXT: fmv.w.x ft0, a4 ; RV64-NEXT: flw ft1, 0(sp) -; RV64-NEXT: fadd.s fa0, ft0, fa0 -; RV64-NEXT: fadd.s fa2, fa1, fa2 -; RV64-NEXT: fadd.s fa4, fa3, fa4 -; RV64-NEXT: fadd.s fa5, fa5, ft1 -; RV64-NEXT: fsw fa0, 0(a0) -; RV64-NEXT: fsw fa2, 4(a0) -; RV64-NEXT: fsw fa4, 8(a0) -; RV64-NEXT: fsw fa5, 12(a0) +; RV64-NEXT: fadd.s fa5, fa4, fa5 +; RV64-NEXT: fadd.s fa4, fa2, fa3 +; RV64-NEXT: fadd.s fa3, fa0, fa1 +; RV64-NEXT: fadd.s fa2, ft0, ft1 +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: fsw fa4, 4(a0) +; RV64-NEXT: fsw fa3, 8(a0) +; RV64-NEXT: fsw fa2, 12(a0) ; RV64-NEXT: ret ; ; RV64LP64F-LABEL: callee_v4f32: diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index f30c453d7f6bc..c23afce6a3bb8 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -974,15 +974,15 @@ define fastcc void @fastcc_call_nonfastcc(){ ; RV64I-LARGE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-LARGE-NEXT: .cfi_offset ra, -8 ; RV64I-LARGE-NEXT: li t0, 10 -; RV64I-LARGE-NEXT: li t1, 9 ; RV64I-LARGE-NEXT: .Lpcrel_hi6: -; RV64I-LARGE-NEXT: auipc a5, %pcrel_hi(.LCPI11_0) +; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI11_0) +; RV64I-LARGE-NEXT: li t1, 9 +; RV64I-LARGE-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi6)(a0) ; RV64I-LARGE-NEXT: li a0, 1 ; RV64I-LARGE-NEXT: li a1, 2 ; RV64I-LARGE-NEXT: li a2, 3 ; RV64I-LARGE-NEXT: li a3, 4 ; RV64I-LARGE-NEXT: li a4, 5 -; RV64I-LARGE-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi6)(a5) ; RV64I-LARGE-NEXT: li a5, 6 ; RV64I-LARGE-NEXT: li a6, 7 ; RV64I-LARGE-NEXT: li a7, 8 @@ -1003,15 +1003,15 @@ define fastcc void @fastcc_call_nonfastcc(){ ; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-LARGE-ZICFILP-NEXT: .cfi_offset ra, -8 ; RV64I-LARGE-ZICFILP-NEXT: li t0, 10 -; RV64I-LARGE-ZICFILP-NEXT: li t1, 9 ; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi6: -; RV64I-LARGE-ZICFILP-NEXT: auipc a5, %pcrel_hi(.LCPI11_0) +; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI11_0) +; RV64I-LARGE-ZICFILP-NEXT: li t1, 9 +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi6)(a0) ; RV64I-LARGE-ZICFILP-NEXT: li a0, 1 ; RV64I-LARGE-ZICFILP-NEXT: li a1, 2 ; RV64I-LARGE-ZICFILP-NEXT: li a2, 3 ; RV64I-LARGE-ZICFILP-NEXT: li a3, 4 ; RV64I-LARGE-ZICFILP-NEXT: li a4, 5 -; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi6)(a5) ; RV64I-LARGE-ZICFILP-NEXT: li a5, 6 ; RV64I-LARGE-ZICFILP-NEXT: li a6, 7 ; RV64I-LARGE-ZICFILP-NEXT: li a7, 8 diff --git a/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll index 1957019f055a2..5ffd60a7fa607 100644 --- a/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll +++ b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll @@ -170,8 +170,8 @@ define signext i32 @test4(i32 signext %x, i32 signext %y, i32 signext %z) { ; ; CMOV-NOZICOND-LABEL: test4: ; CMOV-NOZICOND: # %bb.0: -; CMOV-NOZICOND-NEXT: li a1, 0 ; CMOV-NOZICOND-NEXT: li a0, 3 +; CMOV-NOZICOND-NEXT: li a1, 0 ; CMOV-NOZICOND-NEXT: beqz a2, .LBB3_2 ; CMOV-NOZICOND-NEXT: # %bb.1: ; CMOV-NOZICOND-NEXT: mv a0, a1 diff --git a/llvm/test/CodeGen/RISCV/combine-storetomstore.ll b/llvm/test/CodeGen/RISCV/combine-storetomstore.ll index c7d1f76e73cf2..9a9a5353d0afe 100644 --- a/llvm/test/CodeGen/RISCV/combine-storetomstore.ll +++ b/llvm/test/CodeGen/RISCV/combine-storetomstore.ll @@ -60,9 +60,9 @@ define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> % ; RISCV-NEXT: # %bb.1: ; RISCV-NEXT: mv a2, a1 ; RISCV-NEXT: .LBB4_2: +; RISCV-NEXT: vmv1r.v v0, v9 ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vmv.v.i v8, 0 -; RISCV-NEXT: vmv1r.v v0, v9 ; RISCV-NEXT: vmerge.vim v8, v8, 1, v0 ; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RISCV-NEXT: vslidedown.vi v8, v8, 2 @@ -199,9 +199,9 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> % ; RISCV-NEXT: # %bb.1: ; RISCV-NEXT: mv a2, a1 ; RISCV-NEXT: .LBB11_2: +; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RISCV-NEXT: vmv.v.i v9, 0 -; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: vmerge.vim v9, v9, 1, v0 ; RISCV-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RISCV-NEXT: vslidedown.vi v9, v9, 4 @@ -269,7 +269,7 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> % ; RISCV-NEXT: vfirst.m t0, v10 ; RISCV-NEXT: bnez t0, .LBB11_16 ; RISCV-NEXT: .LBB11_13: -; RISCV-NEXT: addi t1, a0, 16 +; RISCV-NEXT: addi t0, a0, 16 ; RISCV-NEXT: j .LBB11_17 ; RISCV-NEXT: .LBB11_14: ; RISCV-NEXT: addi a6, a0, 40 @@ -280,21 +280,21 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> % ; RISCV-NEXT: vfirst.m t0, v10 ; RISCV-NEXT: beqz t0, .LBB11_13 ; RISCV-NEXT: .LBB11_16: -; RISCV-NEXT: addi t1, a1, 4 +; RISCV-NEXT: addi t0, a1, 4 ; RISCV-NEXT: .LBB11_17: ; RISCV-NEXT: vmv1r.v v0, v8 -; RISCV-NEXT: lh t0, 0(a2) +; RISCV-NEXT: vmerge.vim v8, v12, 1, v0 +; RISCV-NEXT: lh t1, 0(a2) ; RISCV-NEXT: lh a2, 0(a3) ; RISCV-NEXT: lh a3, 0(a4) ; RISCV-NEXT: lh a4, 0(a5) ; RISCV-NEXT: lh a5, 0(a6) ; RISCV-NEXT: lh a6, 0(a7) -; RISCV-NEXT: lh a7, 0(t1) -; RISCV-NEXT: vmerge.vim v8, v12, 1, v0 +; RISCV-NEXT: lh a7, 0(t0) ; RISCV-NEXT: vslidedown.vi v8, v8, 1 -; RISCV-NEXT: vmv.x.s t1, v8 -; RISCV-NEXT: andi t1, t1, 1 -; RISCV-NEXT: bnez t1, .LBB11_19 +; RISCV-NEXT: vmv.x.s t0, v8 +; RISCV-NEXT: andi t0, t0, 1 +; RISCV-NEXT: bnez t0, .LBB11_19 ; RISCV-NEXT: # %bb.18: ; RISCV-NEXT: addi a0, a1, 2 ; RISCV-NEXT: j .LBB11_20 @@ -302,7 +302,7 @@ define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> % ; RISCV-NEXT: addi a0, a0, 8 ; RISCV-NEXT: .LBB11_20: ; RISCV-NEXT: lh a0, 0(a0) -; RISCV-NEXT: sh t0, 0(a1) +; RISCV-NEXT: sh t1, 0(a1) ; RISCV-NEXT: sh a0, 2(a1) ; RISCV-NEXT: sh a7, 4(a1) ; RISCV-NEXT: sh a3, 6(a1) @@ -523,10 +523,10 @@ define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mas ; RISCV-NEXT: addi a1, a1, 16 ; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RISCV-NEXT: vle32.v v8, (a0) -; RISCV-NEXT: addi a1, sp, 16 -; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill ; RISCV-NEXT: vmv.v.i v8, 0 +; RISCV-NEXT: vle32.v v10, (a0) +; RISCV-NEXT: addi a1, sp, 16 +; RISCV-NEXT: vs2r.v v10, (a1) # vscale x 16-byte Folded Spill ; RISCV-NEXT: vse32.v v8, (a0) ; RISCV-NEXT: call use_vec ; RISCV-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/compress.ll b/llvm/test/CodeGen/RISCV/compress.ll index c8803773d7630..d3f6ce7b05fd3 100644 --- a/llvm/test/CodeGen/RISCV/compress.ll +++ b/llvm/test/CodeGen/RISCV/compress.ll @@ -32,8 +32,8 @@ define i32 @simple_arith(i32 %a, i32 %b) #0 { ; RV32IC-LABEL: : ; RV32IC: addi a2, a0, 0x1 -; RV32IC-NEXT: c.srai a1, 0x9 ; RV32IC-NEXT: c.andi a2, 0xb +; RV32IC-NEXT: c.srai a1, 0x9 ; RV32IC-NEXT: c.slli a2, 0x7 ; RV32IC-NEXT: sub a0, a1, a0 ; RV32IC-NEXT: c.add a0, a2 @@ -49,37 +49,37 @@ define i32 @simple_arith(i32 %a, i32 %b) #0 { define i32 @select(i32 %a, ptr %b) #0 { ; RV32IC-LABEL: