diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index d7ad4c14468ee..503e95fbd23f7 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -338,7 +338,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Tail = *MRI->use_instr_begin(DestReg); switch (Tail.getOpcode()) { default: - LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" + LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr: " << Tail); break; case RISCV::ADDI: @@ -387,6 +387,29 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi, return false; } +static void overwriteMachineOperandInPlace(MachineOperand &MO, + const MachineOperand &ImmOp) { + switch (ImmOp.getType()) { + case MachineOperand::MO_ConstantPoolIndex: + MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(), ImmOp.getTargetFlags()); + break; + case MachineOperand::MO_GlobalAddress: + MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), ImmOp.getTargetFlags()); + break; + case MachineOperand::MO_MCSymbol: + MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags()); + MO.setOffset(ImmOp.getOffset()); + break; + case MachineOperand::MO_BlockAddress: + MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(), + ImmOp.getTargetFlags()); + break; + default: + report_fatal_error("unsupported machine operand type"); + break; + } +} + bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo) { Register DestReg = Lo.getOperand(0).getReg(); @@ -451,6 +474,34 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, CommonOffset = Offset; break; } + case RISCV::PseudoCCLD: + case RISCV::PseudoCCLW: + case RISCV::PseudoCCLWU: + case RISCV::PseudoCCLH: + case RISCV::PseudoCCLHU: + case RISCV::PseudoCCLB: + case RISCV::PseudoCCLBU: { + // The SFB Pseudos are like their non-SFB counterparts but have more + // operands. + if (UseMI.getOperand(2).isFI()) + return false; + // Register defined by Lo should not be the (tied) false value, or a + // register used in the branch predicate. + if (DestReg == UseMI.getOperand(1).getReg() || + DestReg == UseMI.getOperand(5).getReg()) + return false; + if (UseMI.getOperand(6).isReg() && + DestReg == UseMI.getOperand(6).getReg()) + return false; + assert(DestReg == UseMI.getOperand(2).getReg() && + "Expected base address use"); + // All load/store instructions must use the same offset. + int64_t Offset = UseMI.getOperand(3).getImm(); + if (CommonOffset && Offset != CommonOffset) + return false; + CommonOffset = Offset; + break; + } case RISCV::INLINEASM: case RISCV::INLINEASM_BR: { SmallVector InlineAsmMemoryOpIndexes; @@ -538,30 +589,57 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI]; for (unsigned I : InlineAsmMemoryOpIndexes) { MachineOperand &MO = UseMI.getOperand(I + 1); - switch (ImmOp.getType()) { - case MachineOperand::MO_GlobalAddress: - MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), - ImmOp.getTargetFlags()); - break; - case MachineOperand::MO_MCSymbol: - MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags()); - MO.setOffset(ImmOp.getOffset()); - break; - case MachineOperand::MO_BlockAddress: - MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(), - ImmOp.getTargetFlags()); - break; - default: - report_fatal_error("unsupported machine operand type"); - break; - } + overwriteMachineOperandInPlace(MO, ImmOp); } } else { + unsigned ImmIdx; + switch (UseMI.getOpcode()) { + case RISCV::INLINEASM: + case RISCV::INLINEASM_BR: + llvm_unreachable("Should have been dealt with before this else"); + case RISCV::PseudoCCLD: + case RISCV::PseudoCCLW: + case RISCV::PseudoCCLWU: + case RISCV::PseudoCCLH: + case RISCV::PseudoCCLHU: + case RISCV::PseudoCCLB: + case RISCV::PseudoCCLBU: + ImmIdx = 3; + break; + case RISCV::LB: + case RISCV::LH: + case RISCV::LH_INX: + case RISCV::LW: + case RISCV::LW_INX: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::LD_RV32: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SH_INX: + case RISCV::SW: + case RISCV::SW_INX: + case RISCV::SD: + case RISCV::SD_RV32: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: + ImmIdx = 2; + break; + default: + llvm_unreachable("Unknown Instruction"); + } + + MachineOperand &MO = UseMI.getOperand(ImmIdx); if (Hi.getOpcode() == RISCV::QC_E_LI) { - UseMI.getOperand(2).ChangeToImmediate(0); + MO.ChangeToImmediate(0); } else { - UseMI.removeOperand(2); - UseMI.addOperand(ImmOp); + overwriteMachineOperandInPlace(MO, ImmOp); } } } diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore-cpi.mir b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-cpi.mir new file mode 100644 index 0000000000000..42cac83678cc7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-cpi.mir @@ -0,0 +1,76 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mattr=+m -mtriple=riscv64 -run-pass riscv-merge-base-offset %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" + target triple = "riscv64" + + define i64 @udiv64_constant_add(i64 %a) nounwind { + %1 = udiv i64 %a, 7 + ret i64 %1 + } +... +--- +name: udiv64_constant_add +alignment: 4 +liveins: + - { reg: '$x10', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: + - id: 0 + value: i64 2635249153387078803 + alignment: 8 + isTargetSpecific: false +body: | + bb.1 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: udiv64_constant_add + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LUI:%[0-9]+]]:gpr = LUI target-flags(riscv-hi) %const.0 + ; CHECK-NEXT: [[LD:%[0-9]+]]:gpr = LD [[LUI]], target-flags(riscv-lo) %const.0 :: (load (s64) from constant-pool) + ; CHECK-NEXT: [[MULHU:%[0-9]+]]:gpr = MULHU [[COPY]], [[LD]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[MULHU]] + ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[SUB]], 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[SRLI]], [[MULHU]] + ; CHECK-NEXT: [[SRLI1:%[0-9]+]]:gpr = SRLI [[ADD]], 2 + ; CHECK-NEXT: $x10 = COPY [[SRLI1]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gpr = COPY $x10 + %17:gpr = LUI target-flags(riscv-hi) %const.0 + %16:gpr = ADDI %17, target-flags(riscv-lo) %const.0 + %4:gpr = LD %16, 0 :: (load (s64) from constant-pool) + %8:gpr = MULHU %0, %4 + %9:gpr = SUB %0, %8 + %11:gpr = SRLI %9, 1 + %12:gpr = ADD %11, %8 + %13:gpr = SRLI %12, 2 + $x10 = COPY %13 + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll b/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll new file mode 100644 index 0000000000000..2cd2d29e5e164 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/sfb-merge-base-offset.ll @@ -0,0 +1,493 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 + +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+short-forward-branch-iload,+short-forward-branch-imm < %s \ +; RUN: | FileCheck -check-prefix=RV32I-WITH-SFB %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+xqci,+short-forward-branch-iload,+short-forward-branch-imm < %s \ +; RUN: | FileCheck -check-prefix=XQCI-WITH-SFB %s + +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+short-forward-branch-iload,+short-forward-branch-imm < %s \ +; RUN: | FileCheck -check-prefix=RV64I-WITH-SFB %s + + +@ga = global [16 x i8] zeroinitializer, align 8 + +;; This tests that we can fold offsets into the SFB Load Pseudos. + +define i64 @load_cond_ga_dword(i64 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_dword: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga) +; RV32I-WITH-SFB-NEXT: addi a4, a4, %lo(ga) +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB0_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, 8(a4) +; RV32I-WITH-SFB-NEXT: .LBB0_2: +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB0_4 +; RV32I-WITH-SFB-NEXT: # %bb.3: +; RV32I-WITH-SFB-NEXT: lw a1, 12(a4) +; RV32I-WITH-SFB-NEXT: .LBB0_4: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_dword: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga +; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB0_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 8(a4) +; XQCI-WITH-SFB-NEXT: .LBB0_2: +; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB0_4 +; XQCI-WITH-SFB-NEXT: # %bb.3: +; XQCI-WITH-SFB-NEXT: lw a1, 12(a4) +; XQCI-WITH-SFB-NEXT: .LBB0_4: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_dword: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+8) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB0_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: ld a0, %lo(ga+8)(a3) +; RV64I-WITH-SFB-NEXT: .LBB0_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 8 + %val = load i64, ptr %addr, align 8 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i64 %val, i64 %a + ret i64 %cmv +} + +define i64 @load_cond_ga_word_sext(i64 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_sext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga+12) +; RV32I-WITH-SFB-NEXT: lw a4, %lo(ga+12)(a4) +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB1_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: mv a0, a4 +; RV32I-WITH-SFB-NEXT: .LBB1_2: +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB1_4 +; RV32I-WITH-SFB-NEXT: # %bb.3: +; RV32I-WITH-SFB-NEXT: srai a1, a4, 31 +; RV32I-WITH-SFB-NEXT: .LBB1_4: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_sext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga+12 +; XQCI-WITH-SFB-NEXT: lw a4, 0(a4) +; XQCI-WITH-SFB-NEXT: qc.mveq a0, a2, a3, a4 +; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB1_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: srai a1, a4, 31 +; XQCI-WITH-SFB-NEXT: .LBB1_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_sext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+12) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB1_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+12)(a3) +; RV64I-WITH-SFB-NEXT: .LBB1_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 12 + %val = load i32, ptr %addr, align 4 + %ext = sext i32 %val to i64 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i64 %ext, i64 %a + ret i64 %cmv +} + +define i64 @load_cond_ga_word_zext(i64 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_zext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a4, %hi(ga+12) +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB2_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+12)(a4) +; RV32I-WITH-SFB-NEXT: .LBB2_2: +; RV32I-WITH-SFB-NEXT: bne a2, a3, .LBB2_4 +; RV32I-WITH-SFB-NEXT: # %bb.3: +; RV32I-WITH-SFB-NEXT: li a1, 0 +; RV32I-WITH-SFB-NEXT: .LBB2_4: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_zext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a4, ga+12 +; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB2_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 0(a4) +; XQCI-WITH-SFB-NEXT: .LBB2_2: +; XQCI-WITH-SFB-NEXT: bne a2, a3, .LBB2_4 +; XQCI-WITH-SFB-NEXT: # %bb.3: +; XQCI-WITH-SFB-NEXT: li a1, 0 +; XQCI-WITH-SFB-NEXT: .LBB2_4: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_zext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga+12) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB2_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lwu a0, %lo(ga+12)(a3) +; RV64I-WITH-SFB-NEXT: .LBB2_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 12 + %val = load i32, ptr %addr, align 4 + %ext = zext i32 %val to i64 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i64 %ext, i64 %a + ret i64 %cmv +} + +define i32 @load_cond_ga_word(i32 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB3_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a3) +; RV32I-WITH-SFB-NEXT: .LBB3_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+4 +; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB3_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 0(a3) +; XQCI-WITH-SFB-NEXT: .LBB3_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB3_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a3) +; RV64I-WITH-SFB-NEXT: .LBB3_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 4 + %val = load i32, ptr %addr, align 4 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i32 %val, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_half_sext(i32 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_half_sext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB4_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lh a0, %lo(ga+6)(a3) +; RV32I-WITH-SFB-NEXT: .LBB4_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_half_sext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+6 +; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB4_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lh a0, 0(a3) +; XQCI-WITH-SFB-NEXT: .LBB4_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_half_sext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB4_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lh a0, %lo(ga+6)(a3) +; RV64I-WITH-SFB-NEXT: .LBB4_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 6 + %val = load i16, ptr %addr, align 2 + %ext = sext i16 %val to i32 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i32 %ext, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_half_zext(i32 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_half_zext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB5_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lhu a0, %lo(ga+6)(a3) +; RV32I-WITH-SFB-NEXT: .LBB5_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_half_zext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+6 +; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB5_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lhu a0, 0(a3) +; XQCI-WITH-SFB-NEXT: .LBB5_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_half_zext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB5_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lhu a0, %lo(ga+6)(a3) +; RV64I-WITH-SFB-NEXT: .LBB5_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 6 + %val = load i16, ptr %addr, align 2 + %ext = zext i16 %val to i32 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i32 %ext, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_byte_sext(i32 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_byte_sext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB6_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lb a0, %lo(ga+7)(a3) +; RV32I-WITH-SFB-NEXT: .LBB6_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_byte_sext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+7 +; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB6_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lb a0, 0(a3) +; XQCI-WITH-SFB-NEXT: .LBB6_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_byte_sext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB6_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lb a0, %lo(ga+7)(a3) +; RV64I-WITH-SFB-NEXT: .LBB6_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 7 + %val = load i8, ptr %addr, align 1 + %ext = sext i8 %val to i32 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i32 %ext, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_byte_zext(i32 %a, i32 %b, i32 %c) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_byte_zext: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV32I-WITH-SFB-NEXT: bne a1, a2, .LBB7_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lbu a0, %lo(ga+7)(a3) +; RV32I-WITH-SFB-NEXT: .LBB7_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_byte_zext: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a3, ga+7 +; XQCI-WITH-SFB-NEXT: bne a1, a2, .LBB7_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lbu a0, 0(a3) +; XQCI-WITH-SFB-NEXT: .LBB7_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_byte_zext: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a2, a2 +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a3, %hi(ga) +; RV64I-WITH-SFB-NEXT: bne a1, a2, .LBB7_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lbu a0, %lo(ga+7)(a3) +; RV64I-WITH-SFB-NEXT: .LBB7_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 7 + %val = load i8, ptr %addr, align 1 + %ext = zext i8 %val to i32 + %cmp = icmp eq i32 %b, %c + %cmv = select i1 %cmp, i32 %ext, i32 %a + ret i32 %cmv +} + +;; Check we handle cases where the branch is with an immediate + +define i32 @load_cond_ga_word_imm(i32 %a, i32 %b) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_imm: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a2, %hi(ga) +; RV32I-WITH-SFB-NEXT: li a3, 14 +; RV32I-WITH-SFB-NEXT: bne a1, a3, .LBB8_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a2) +; RV32I-WITH-SFB-NEXT: .LBB8_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_imm: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a2, ga+4 +; XQCI-WITH-SFB-NEXT: qc.bnei a1, 14, .LBB8_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 0(a2) +; XQCI-WITH-SFB-NEXT: .LBB8_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_imm: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: lui a2, %hi(ga) +; RV64I-WITH-SFB-NEXT: li a3, 14 +; RV64I-WITH-SFB-NEXT: bne a1, a3, .LBB8_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, %lo(ga+4)(a2) +; RV64I-WITH-SFB-NEXT: .LBB8_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 4 + %val = load i32, ptr %addr, align 4 + %cmp = icmp eq i32 %b, 14 + %cmv = select i1 %cmp, i32 %val, i32 %a + ret i32 %cmv +} + +;; Negative Tests + + +define i32 @load_cond_ga_word_addr_cond(i32 %a, ptr %b) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a2, %hi(ga) +; RV32I-WITH-SFB-NEXT: addi a2, a2, %lo(ga) +; RV32I-WITH-SFB-NEXT: addi a3, a2, 4 +; RV32I-WITH-SFB-NEXT: beq a3, a1, .LBB9_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, 4(a2) +; RV32I-WITH-SFB-NEXT: .LBB9_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_addr_cond: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a2, ga +; XQCI-WITH-SFB-NEXT: addi a3, a2, 4 +; XQCI-WITH-SFB-NEXT: beq a3, a1, .LBB9_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 4(a2) +; XQCI-WITH-SFB-NEXT: .LBB9_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: lui a2, %hi(ga) +; RV64I-WITH-SFB-NEXT: addi a2, a2, %lo(ga) +; RV64I-WITH-SFB-NEXT: addi a3, a2, 4 +; RV64I-WITH-SFB-NEXT: beq a3, a1, .LBB9_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, 4(a2) +; RV64I-WITH-SFB-NEXT: .LBB9_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 4 + %val = load i32, ptr %addr, align 4 + %cmp = icmp ne ptr %addr, %b + %cmv = select i1 %cmp, i32 %val, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_word_addr_cond_imm(i32 %a) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond_imm: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a1, %hi(ga) +; RV32I-WITH-SFB-NEXT: addi a1, a1, %lo(ga) +; RV32I-WITH-SFB-NEXT: addi a2, a1, 4 +; RV32I-WITH-SFB-NEXT: beqz a2, .LBB10_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a0, 4(a1) +; RV32I-WITH-SFB-NEXT: .LBB10_2: +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_addr_cond_imm: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a1, ga +; XQCI-WITH-SFB-NEXT: addi a2, a1, 4 +; XQCI-WITH-SFB-NEXT: beqz a2, .LBB10_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a0, 4(a1) +; XQCI-WITH-SFB-NEXT: .LBB10_2: +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_addr_cond_imm: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: lui a1, %hi(ga) +; RV64I-WITH-SFB-NEXT: addi a1, a1, %lo(ga) +; RV64I-WITH-SFB-NEXT: addi a2, a1, 4 +; RV64I-WITH-SFB-NEXT: beqz a2, .LBB10_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, 4(a1) +; RV64I-WITH-SFB-NEXT: .LBB10_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 4 + %val = load i32, ptr %addr, align 4 + %cmp = icmp ne ptr %addr, null + %cmv = select i1 %cmp, i32 %val, i32 %a + ret i32 %cmv +} + +define i32 @load_cond_ga_word_addr_false(i32 %a, i32 %b) { +; RV32I-WITH-SFB-LABEL: load_cond_ga_word_addr_false: +; RV32I-WITH-SFB: # %bb.0: +; RV32I-WITH-SFB-NEXT: lui a2, %hi(ga) +; RV32I-WITH-SFB-NEXT: addi a2, a2, %lo(ga) +; RV32I-WITH-SFB-NEXT: beq a0, a1, .LBB11_2 +; RV32I-WITH-SFB-NEXT: # %bb.1: +; RV32I-WITH-SFB-NEXT: lw a2, 4(a2) +; RV32I-WITH-SFB-NEXT: .LBB11_2: +; RV32I-WITH-SFB-NEXT: mv a0, a2 +; RV32I-WITH-SFB-NEXT: ret +; +; XQCI-WITH-SFB-LABEL: load_cond_ga_word_addr_false: +; XQCI-WITH-SFB: # %bb.0: +; XQCI-WITH-SFB-NEXT: qc.e.li a2, ga +; XQCI-WITH-SFB-NEXT: beq a0, a1, .LBB11_2 +; XQCI-WITH-SFB-NEXT: # %bb.1: +; XQCI-WITH-SFB-NEXT: lw a2, 4(a2) +; XQCI-WITH-SFB-NEXT: .LBB11_2: +; XQCI-WITH-SFB-NEXT: mv a0, a2 +; XQCI-WITH-SFB-NEXT: ret +; +; RV64I-WITH-SFB-LABEL: load_cond_ga_word_addr_false: +; RV64I-WITH-SFB: # %bb.0: +; RV64I-WITH-SFB-NEXT: sext.w a1, a1 +; RV64I-WITH-SFB-NEXT: sext.w a2, a0 +; RV64I-WITH-SFB-NEXT: lui a0, %hi(ga) +; RV64I-WITH-SFB-NEXT: addi a0, a0, %lo(ga) +; RV64I-WITH-SFB-NEXT: beq a2, a1, .LBB11_2 +; RV64I-WITH-SFB-NEXT: # %bb.1: +; RV64I-WITH-SFB-NEXT: lw a0, 4(a0) +; RV64I-WITH-SFB-NEXT: .LBB11_2: +; RV64I-WITH-SFB-NEXT: ret + %addr = getelementptr i8, ptr @ga, i32 4 + %val = load i32, ptr %addr, align 4 + %cmp = icmp ne i32 %a, %b + %addr_int = ptrtoint ptr @ga to i32 + %cmv = select i1 %cmp, i32 %val, i32 %addr_int + ret i32 %cmv +}