diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 053caf518bd1f..7a3bc6c2043f4 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -937,6 +937,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM, return 0; EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); + if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT)) + return 0; if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " << "for promoted type\n"); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 4c3da3ad31116..adef40e19cba4 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -366,6 +366,7 @@ class RISCVPassConfig : public TargetPassConfig { void addIRPasses() override; bool addPreISel() override; + void addCodeGenPrepare() override; bool addInstSelector() override; bool addIRTranslator() override; void addPreLegalizeMachineIR() override; @@ -452,6 +453,12 @@ bool RISCVPassConfig::addPreISel() { return false; } +void RISCVPassConfig::addCodeGenPrepare() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createTypePromotionLegacyPass()); + TargetPassConfig::addCodeGenPrepare(); +} + bool RISCVPassConfig::addInstSelector() { addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index e7db8ef9d5aff..364c1e430b915 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -68,6 +68,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Type Promotion ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll index 9e7f2e9525d3b..6e3a50542939f 100644 --- a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll @@ -254,21 +254,39 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ultcmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: ret +; RV32I-LABEL: add_ultcmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ultcmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: ret +; RV64I-LABEL: add_ultcmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ultcmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ultcmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 @@ -421,21 +439,39 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ulecmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: ret +; RV32I-LABEL: add_ulecmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ulecmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: ret +; RV64I-LABEL: add_ulecmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ulecmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ulecmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1 ret i1 %tmp1 diff --git a/llvm/test/CodeGen/RISCV/signbit-test.ll b/llvm/test/CodeGen/RISCV/signbit-test.ll index 69a9026d9af9e..4e10fae06d886 100644 --- a/llvm/test/CodeGen/RISCV/signbit-test.ll +++ b/llvm/test/CodeGen/RISCV/signbit-test.ll @@ -303,7 +303,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind { ; RV32-NEXT: bnez a1, .LBB10_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB10_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB10_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_clear_mask_i16_i8: @@ -312,7 +315,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind { ; RV64-NEXT: bnez a1, .LBB10_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB10_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB10_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 128 @@ -332,7 +338,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind { ; RV32-NEXT: beqz a1, .LBB11_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB11_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB11_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_set_mask_i16_i8: @@ -341,7 +350,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind { ; RV64-NEXT: beqz a1, .LBB11_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB11_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB11_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 128 @@ -361,7 +373,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind { ; RV32-NEXT: beqz a1, .LBB12_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB12_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB12_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_set_mask_i16_i7: @@ -370,7 +385,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind { ; RV64-NEXT: beqz a1, .LBB12_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB12_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB12_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 64 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index 0860853ae9c0a..de36bcdb91060 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -254,23 +254,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ugecmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: ret +; RV32I-LABEL: add_ugecmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ugecmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: ret +; RV64I-LABEL: add_ugecmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ugecmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ugecmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 @@ -471,23 +491,43 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ugtcmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: ret +; RV32I-LABEL: add_ugtcmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ugtcmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: ret +; RV64I-LABEL: add_ugtcmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ugtcmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ugtcmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1 ret i1 %tmp1 diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll index fad9e6c0756b3..3740dc675949f 100644 --- a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll +++ b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll @@ -171,8 +171,7 @@ define i32 @safe_add_underflow(i8 zeroext %a) { define i32 @safe_add_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_add_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: addi a1, a0, -2 ; CHECK-NEXT: li a2, 251 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a1, a2, .LBB9_2 @@ -207,9 +206,8 @@ define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { define i32 @sext_sub_underflow(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -6 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a2, 250 +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: li a2, -6 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a2, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: @@ -240,8 +238,7 @@ define i32 @safe_sub_underflow(i8 zeroext %a) { define i32 @safe_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: addi a1, a0, -4 ; CHECK-NEXT: li a2, 250 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a2, a1, .LBB13_2 @@ -259,9 +256,8 @@ define i32 @safe_sub_underflow_neg(i8 zeroext %a) { define i32 @sext_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a2, 253 +; CHECK-NEXT: addi a1, a0, -4 +; CHECK-NEXT: li a2, -3 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a1, a2, .LBB14_2 ; CHECK-NEXT: # %bb.1: @@ -322,15 +318,18 @@ define i8 @convert_add_order(i8 zeroext %arg) { ; CHECK-LABEL: convert_add_order: ; CHECK: # %bb.0: ; CHECK-NEXT: ori a1, a0, 1 -; CHECK-NEXT: sltiu a2, a1, 50 +; CHECK-NEXT: li a2, 50 +; CHECK-NEXT: bltu a1, a2, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 255 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB19_2: ; CHECK-NEXT: addi a1, a1, -40 -; CHECK-NEXT: andi a1, a1, 255 ; CHECK-NEXT: sltiu a1, a1, 20 -; CHECK-NEXT: li a3, 2 -; CHECK-NEXT: sub a3, a3, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: or a2, a2, a3 -; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret %shl = or i8 %arg, 1 %cmp.0 = icmp ult i8 %shl, 50 @@ -348,9 +347,8 @@ define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { ; CHECK-NEXT: sext.w a2, a0 ; CHECK-NEXT: sgtz a2, a2 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: addi a0, a0, -11 -; CHECK-NEXT: andi a2, a0, 247 -; CHECK-NEXT: bltu a2, a1, .LBB20_2 +; CHECK-NEXT: addi a0, a0, 245 +; CHECK-NEXT: bltu a0, a1, .LBB20_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 100 ; CHECK-NEXT: .LBB20_2: @@ -369,9 +367,10 @@ define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { ; CHECK-LABEL: underflow_if_sub_signext: ; CHECK: # %bb.0: ; CHECK-NEXT: sext.w a2, a0 +; CHECK-NEXT: andi a1, a1, 255 ; CHECK-NEXT: sgtz a2, a2 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: addi a0, a0, -11 +; CHECK-NEXT: addi a0, a0, 245 ; CHECK-NEXT: bltu a0, a1, .LBB21_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 100