diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index ab472e9f44316b..553d0b81b63dfc 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1,12 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBB +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBB -; These IR sequences will generate ISD::ROTL and ISD::ROTR nodes, that the -; RISC-V backend must be able to select +; These IR sequences are idioms for rotates. If rotate instructions are +; supported, they will be turned into ISD::ROTL or ISD::ROTR. -define i32 @rotl(i32 %x, i32 %y) nounwind { -; RV32I-LABEL: rotl: +; FIXME: We don't match 32-bit rotates with Zbb even though we have ROLW/RORW +; instructions. + +define i32 @rotl_32(i32 %x, i32 %y) nounwind { +; RV32I-LABEL: rotl_32: ; RV32I: # %bb.0: ; RV32I-NEXT: li a2, 32 ; RV32I-NEXT: sub a2, a2, a1 @@ -14,6 +23,29 @@ define i32 @rotl(i32 %x, i32 %y) nounwind { ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: subw a2, a2, a1 +; RV64I-NEXT: sllw a1, a0, a1 +; RV64I-NEXT: srlw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rol a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: li a2, 32 +; RV64ZBB-NEXT: subw a2, a2, a1 +; RV64ZBB-NEXT: sllw a1, a0, a1 +; RV64ZBB-NEXT: srlw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret %z = sub i32 32, %y %b = shl i32 %x, %y %c = lshr i32 %x, %z @@ -21,8 +53,8 @@ define i32 @rotl(i32 %x, i32 %y) nounwind { ret i32 %d } -define i32 @rotr(i32 %x, i32 %y) nounwind { -; RV32I-LABEL: rotr: +define i32 @rotr_32(i32 %x, i32 %y) nounwind { +; RV32I-LABEL: rotr_32: ; RV32I: # %bb.0: ; RV32I-NEXT: li a2, 32 ; RV32I-NEXT: sub a2, a2, a1 @@ -30,9 +62,514 @@ define i32 @rotr(i32 %x, i32 %y) nounwind { ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_32: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: subw a2, a2, a1 +; RV64I-NEXT: srlw a1, a0, a1 +; RV64I-NEXT: sllw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ror a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: li a2, 32 +; RV64ZBB-NEXT: subw a2, a2, a1 +; RV64ZBB-NEXT: srlw a1, a0, a1 +; RV64ZBB-NEXT: sllw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret %z = sub i32 32, %y %b = lshr i32 %x, %y %c = shl i32 %x, %z %d = or i32 %b, %c ret i32 %d } + +define i64 @rotl_64(i64 %x, i64 %y) nounwind { +; RV32I-LABEL: rotl_64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: addi a5, a2, -32 +; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: bltz a5, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a1, a0, a5 +; RV32I-NEXT: j .LBB2_3 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sll a1, a3, a2 +; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: bltz a6, .LBB2_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl a4, a3, a6 +; RV32I-NEXT: bltz a5, .LBB2_6 +; RV32I-NEXT: j .LBB2_7 +; RV32I-NEXT: .LBB2_5: +; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: srl a7, a0, a6 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: slli t0, a3, 1 +; RV32I-NEXT: sll a4, t0, a4 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: srl a3, a3, a6 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: bgez a5, .LBB2_7 +; RV32I-NEXT: .LBB2_6: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: .LBB2_7: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 64 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: sll a1, a0, a1 +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: mv a3, a1 +; RV32ZBB-NEXT: addi a5, a2, -32 +; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: bltz a5, .LBB2_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sll a1, a0, a5 +; RV32ZBB-NEXT: j .LBB2_3 +; RV32ZBB-NEXT: .LBB2_2: +; RV32ZBB-NEXT: sll a1, a3, a2 +; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a1, a1, a6 +; RV32ZBB-NEXT: .LBB2_3: +; RV32ZBB-NEXT: li a6, 32 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: bltz a6, .LBB2_5 +; RV32ZBB-NEXT: # %bb.4: +; RV32ZBB-NEXT: srl a4, a3, a6 +; RV32ZBB-NEXT: bltz a5, .LBB2_6 +; RV32ZBB-NEXT: j .LBB2_7 +; RV32ZBB-NEXT: .LBB2_5: +; RV32ZBB-NEXT: li a6, 64 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: srl a7, a0, a6 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: slli t0, a3, 1 +; RV32ZBB-NEXT: sll a4, t0, a4 +; RV32ZBB-NEXT: or a4, a7, a4 +; RV32ZBB-NEXT: srl a3, a3, a6 +; RV32ZBB-NEXT: or a1, a1, a3 +; RV32ZBB-NEXT: bgez a5, .LBB2_7 +; RV32ZBB-NEXT: .LBB2_6: +; RV32ZBB-NEXT: sll a0, a0, a2 +; RV32ZBB-NEXT: or a4, a4, a0 +; RV32ZBB-NEXT: .LBB2_7: +; RV32ZBB-NEXT: mv a0, a4 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rol a0, a0, a1 +; RV64ZBB-NEXT: ret + %z = sub i64 64, %y + %b = shl i64 %x, %y + %c = lshr i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64(i64 %x, i64 %y) nounwind { +; RV32I-LABEL: rotr_64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: addi a5, a2, -32 +; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: bltz a5, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: srl a0, a3, a2 +; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: bltz a6, .LBB3_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: sll a4, a3, a6 +; RV32I-NEXT: bltz a5, .LBB3_6 +; RV32I-NEXT: j .LBB3_7 +; RV32I-NEXT: .LBB3_5: +; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: sll a7, a1, a6 +; RV32I-NEXT: sub a4, a4, a6 +; RV32I-NEXT: srli t0, a3, 1 +; RV32I-NEXT: srl a4, t0, a4 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: sll a3, a3, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: bgez a5, .LBB3_7 +; RV32I-NEXT: .LBB3_6: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a4, a4, a1 +; RV32I-NEXT: .LBB3_7: +; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 64 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: srl a1, a0, a1 +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: addi a5, a2, -32 +; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: bltz a5, .LBB3_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: j .LBB3_3 +; RV32ZBB-NEXT: .LBB3_2: +; RV32ZBB-NEXT: srl a0, a3, a2 +; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a0, a0, a6 +; RV32ZBB-NEXT: .LBB3_3: +; RV32ZBB-NEXT: li a6, 32 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: bltz a6, .LBB3_5 +; RV32ZBB-NEXT: # %bb.4: +; RV32ZBB-NEXT: sll a4, a3, a6 +; RV32ZBB-NEXT: bltz a5, .LBB3_6 +; RV32ZBB-NEXT: j .LBB3_7 +; RV32ZBB-NEXT: .LBB3_5: +; RV32ZBB-NEXT: li a6, 64 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: sll a7, a1, a6 +; RV32ZBB-NEXT: sub a4, a4, a6 +; RV32ZBB-NEXT: srli t0, a3, 1 +; RV32ZBB-NEXT: srl a4, t0, a4 +; RV32ZBB-NEXT: or a4, a7, a4 +; RV32ZBB-NEXT: sll a3, a3, a6 +; RV32ZBB-NEXT: or a0, a0, a3 +; RV32ZBB-NEXT: bgez a5, .LBB3_7 +; RV32ZBB-NEXT: .LBB3_6: +; RV32ZBB-NEXT: srl a1, a1, a2 +; RV32ZBB-NEXT: or a4, a4, a1 +; RV32ZBB-NEXT: .LBB3_7: +; RV32ZBB-NEXT: mv a1, a4 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ror a0, a0, a1 +; RV64ZBB-NEXT: ret + %z = sub i64 64, %y + %b = lshr i64 %x, %y + %c = shl i64 %x, %z + %d = or i64 %b, %c + ret i64 %d +} + +define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind { +; RV32I-LABEL: rotl_32_mask: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: sll a1, a0, a1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_32_mask: +; RV64I: # %bb.0: +; RV64I-NEXT: negw a2, a1 +; RV64I-NEXT: sllw a1, a0, a1 +; RV64I-NEXT: srlw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_32_mask: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rol a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_32_mask: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a2, a1 +; RV64ZBB-NEXT: sllw a1, a0, a1 +; RV64ZBB-NEXT: srlw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = shl i32 %x, %y + %c = lshr i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind { +; RV32I-LABEL: rotr_32_mask: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a2, a1 +; RV32I-NEXT: srl a1, a0, a1 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_32_mask: +; RV64I: # %bb.0: +; RV64I-NEXT: negw a2, a1 +; RV64I-NEXT: srlw a1, a0, a1 +; RV64I-NEXT: sllw a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_32_mask: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ror a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_32_mask: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a2, a1 +; RV64ZBB-NEXT: srlw a1, a0, a1 +; RV64ZBB-NEXT: sllw a0, a0, a2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %z = sub i32 0, %y + %and = and i32 %z, 31 + %b = lshr i32 %x, %y + %c = shl i32 %x, %and + %d = or i32 %b, %c + ret i32 %d +} + +define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { +; RV32I-LABEL: rotl_64_mask: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: addi a5, a2, -32 +; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: bltz a5, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a1, a0, a5 +; RV32I-NEXT: j .LBB6_3 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: sll a1, a3, a2 +; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: .LBB6_3: +; RV32I-NEXT: neg a6, a2 +; RV32I-NEXT: andi a7, a6, 63 +; RV32I-NEXT: addi t0, a7, -32 +; RV32I-NEXT: bltz t0, .LBB6_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: srl a4, a3, t0 +; RV32I-NEXT: bltz a5, .LBB6_6 +; RV32I-NEXT: j .LBB6_7 +; RV32I-NEXT: .LBB6_5: +; RV32I-NEXT: srl t0, a0, a6 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: slli a7, a3, 1 +; RV32I-NEXT: sll a4, a7, a4 +; RV32I-NEXT: or a4, t0, a4 +; RV32I-NEXT: srl a3, a3, a6 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: bgez a5, .LBB6_7 +; RV32I-NEXT: .LBB6_6: +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: or a4, a4, a0 +; RV32I-NEXT: .LBB6_7: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotl_64_mask: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: sll a1, a0, a1 +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotl_64_mask: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: mv a3, a1 +; RV32ZBB-NEXT: addi a5, a2, -32 +; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: bltz a5, .LBB6_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sll a1, a0, a5 +; RV32ZBB-NEXT: j .LBB6_3 +; RV32ZBB-NEXT: .LBB6_2: +; RV32ZBB-NEXT: sll a1, a3, a2 +; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a1, a1, a6 +; RV32ZBB-NEXT: .LBB6_3: +; RV32ZBB-NEXT: neg a6, a2 +; RV32ZBB-NEXT: andi a7, a6, 63 +; RV32ZBB-NEXT: addi t0, a7, -32 +; RV32ZBB-NEXT: bltz t0, .LBB6_5 +; RV32ZBB-NEXT: # %bb.4: +; RV32ZBB-NEXT: srl a4, a3, t0 +; RV32ZBB-NEXT: bltz a5, .LBB6_6 +; RV32ZBB-NEXT: j .LBB6_7 +; RV32ZBB-NEXT: .LBB6_5: +; RV32ZBB-NEXT: srl t0, a0, a6 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: slli a7, a3, 1 +; RV32ZBB-NEXT: sll a4, a7, a4 +; RV32ZBB-NEXT: or a4, t0, a4 +; RV32ZBB-NEXT: srl a3, a3, a6 +; RV32ZBB-NEXT: or a1, a1, a3 +; RV32ZBB-NEXT: bgez a5, .LBB6_7 +; RV32ZBB-NEXT: .LBB6_6: +; RV32ZBB-NEXT: sll a0, a0, a2 +; RV32ZBB-NEXT: or a4, a4, a0 +; RV32ZBB-NEXT: .LBB6_7: +; RV32ZBB-NEXT: mv a0, a4 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotl_64_mask: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rol a0, a0, a1 +; RV64ZBB-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = shl i64 %x, %y + %c = lshr i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +} + +define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { +; RV32I-LABEL: rotr_64_mask: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: addi a5, a2, -32 +; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: bltz a5, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srl a0, a3, a2 +; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: neg a6, a2 +; RV32I-NEXT: andi a7, a6, 63 +; RV32I-NEXT: addi t0, a7, -32 +; RV32I-NEXT: bltz t0, .LBB7_5 +; RV32I-NEXT: # %bb.4: +; RV32I-NEXT: sll a4, a3, t0 +; RV32I-NEXT: bltz a5, .LBB7_6 +; RV32I-NEXT: j .LBB7_7 +; RV32I-NEXT: .LBB7_5: +; RV32I-NEXT: sll t0, a1, a6 +; RV32I-NEXT: sub a4, a4, a7 +; RV32I-NEXT: srli a7, a3, 1 +; RV32I-NEXT: srl a4, a7, a4 +; RV32I-NEXT: or a4, t0, a4 +; RV32I-NEXT: sll a3, a3, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: bgez a5, .LBB7_7 +; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a4, a4, a1 +; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rotr_64_mask: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a2, a1 +; RV64I-NEXT: srl a1, a0, a1 +; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: rotr_64_mask: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: addi a5, a2, -32 +; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: bltz a5, .LBB7_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: j .LBB7_3 +; RV32ZBB-NEXT: .LBB7_2: +; RV32ZBB-NEXT: srl a0, a3, a2 +; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a0, a0, a6 +; RV32ZBB-NEXT: .LBB7_3: +; RV32ZBB-NEXT: neg a6, a2 +; RV32ZBB-NEXT: andi a7, a6, 63 +; RV32ZBB-NEXT: addi t0, a7, -32 +; RV32ZBB-NEXT: bltz t0, .LBB7_5 +; RV32ZBB-NEXT: # %bb.4: +; RV32ZBB-NEXT: sll a4, a3, t0 +; RV32ZBB-NEXT: bltz a5, .LBB7_6 +; RV32ZBB-NEXT: j .LBB7_7 +; RV32ZBB-NEXT: .LBB7_5: +; RV32ZBB-NEXT: sll t0, a1, a6 +; RV32ZBB-NEXT: sub a4, a4, a7 +; RV32ZBB-NEXT: srli a7, a3, 1 +; RV32ZBB-NEXT: srl a4, a7, a4 +; RV32ZBB-NEXT: or a4, t0, a4 +; RV32ZBB-NEXT: sll a3, a3, a6 +; RV32ZBB-NEXT: or a0, a0, a3 +; RV32ZBB-NEXT: bgez a5, .LBB7_7 +; RV32ZBB-NEXT: .LBB7_6: +; RV32ZBB-NEXT: srl a1, a1, a2 +; RV32ZBB-NEXT: or a4, a4, a1 +; RV32ZBB-NEXT: .LBB7_7: +; RV32ZBB-NEXT: mv a1, a4 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: rotr_64_mask: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ror a0, a0, a1 +; RV64ZBB-NEXT: ret + %z = sub i64 0, %y + %and = and i64 %z, 63 + %b = lshr i64 %x, %y + %c = shl i64 %x, %and + %d = or i64 %b, %c + ret i64 %d +}