From 6865dee726d0eeaf63094f27ad9c21e8c25da6c4 Mon Sep 17 00:00:00 2001 From: Yuanke Luo Date: Wed, 22 Oct 2025 11:49:42 +0800 Subject: [PATCH 1/4] [FastIsel] Get the right register type for call instruction When switch from fast isel to dag isel the input value is from llvm IR instruction. If the instruction is call we should get the calling convention of the callee and pass it to RegsForValue::getCopyFromRegs, so that it can deduce the right RegisterVT of the returned value of the callee. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 7 ++++++- llvm/test/CodeGen/X86/bf16-fast-isel.ll | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/bf16-fast-isel.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 20a0efd3afa1c..14f9fdfddf658 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1977,8 +1977,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Instruction *Inst = dyn_cast(V)) { Register InReg = FuncInfo.InitializeRegForValue(Inst); + std::optional CallConv = std::nullopt; + auto *Callee = dyn_cast(Inst); + if (Callee && !Callee->isInlineAsm()) + CallConv = Callee->getCallingConv(); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), std::nullopt); + Inst->getType(), CallConv); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll new file mode 100644 index 0000000000000..43622566d2974 --- /dev/null +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define i8 @test(ptr %f) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: callq bar@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %call = call bfloat @foo(ptr %f) + %call2 = call noundef zeroext i8 @bar(bfloat %call) + ret i8 %call2 +} + +declare bfloat @foo(ptr %f) +declare zeroext i8 @bar(bfloat) From 8b591414cecace4d608f04c91e757d27fde58a71 Mon Sep 17 00:00:00 2001 From: Yuanke Luo Date: Wed, 22 Oct 2025 15:40:53 +0800 Subject: [PATCH 2/4] Address Matt's comments. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 8 +++---- llvm/test/CodeGen/X86/bf16-fast-isel.ll | 22 ++++++++++++++++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 14f9fdfddf658..dcf2df305d24a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1977,10 +1977,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Instruction *Inst = dyn_cast(V)) { Register InReg = FuncInfo.InitializeRegForValue(Inst); - std::optional CallConv = std::nullopt; - auto *Callee = dyn_cast(Inst); - if (Callee && !Callee->isInlineAsm()) - CallConv = Callee->getCallingConv(); + std::optional CallConv; + auto *CI = dyn_cast(Inst); + if (CI && !CI->isInlineAsm()) + CallConv = CI->getCallingConv(); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, Inst->getType(), CallConv); diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll index 43622566d2974..a22c1911cb6a8 100644 --- a/llvm/test/CodeGen/X86/bf16-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -define i8 @test(ptr %f) nounwind { -; CHECK-LABEL: test: +define i8 @test_direct_call(ptr %f) nounwind { +; CHECK-LABEL: test_direct_call: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq foo@PLT @@ -11,7 +11,23 @@ define i8 @test(ptr %f) nounwind { ; CHECK-NEXT: retq entry: %call = call bfloat @foo(ptr %f) - %call2 = call noundef zeroext i8 @bar(bfloat %call) + %call2 = call zeroext i8 @bar(bfloat %call) + ret i8 %call2 +} + +define i8 @test_indirect_all(ptr %fptr, ptr %f) nounwind { +; CHECK-LABEL: test_indirect_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: callq *%rbx +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %call = call bfloat @foo(ptr %f) + %call2 = call zeroext i8 %fptr(bfloat %call) ret i8 %call2 } From 5620e4fb8aebbc1fe389fb61ebc3eb417b071fca Mon Sep 17 00:00:00 2001 From: Yuanke Luo Date: Wed, 22 Oct 2025 15:50:10 +0800 Subject: [PATCH 3/4] Add test case for fast calling convention --- llvm/test/CodeGen/X86/bf16-fast-isel.ll | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll index a22c1911cb6a8..9fdea0b899285 100644 --- a/llvm/test/CodeGen/X86/bf16-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -15,6 +15,20 @@ entry: ret i8 %call2 } +define i8 @test_fast_direct_call(ptr %f) nounwind { +; CHECK-LABEL: test_fast_direct_call: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo_fast@PLT +; CHECK-NEXT: callq bar@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %call = call fastcc bfloat @foo_fast(ptr %f) + %call2 = call zeroext i8 @bar(bfloat %call) + ret i8 %call2 +} + define i8 @test_indirect_all(ptr %fptr, ptr %f) nounwind { ; CHECK-LABEL: test_indirect_all: ; CHECK: # %bb.0: # %entry @@ -31,5 +45,22 @@ entry: ret i8 %call2 } +define i8 @test_fast_indirect_all(ptr %fptr, ptr %f) nounwind { +; CHECK-LABEL: test_fast_indirect_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: callq *%rbx +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %call = call fastcc bfloat @foo(ptr %f) + %call2 = call zeroext i8 %fptr(bfloat %call) + ret i8 %call2 +} + declare bfloat @foo(ptr %f) declare zeroext i8 @bar(bfloat) +declare fastcc bfloat @foo_fast(ptr %f) From ad77fbefe87d85e5ea1804a5c1c13664b015602e Mon Sep 17 00:00:00 2001 From: Yuanke Luo Date: Wed, 22 Oct 2025 16:16:41 +0800 Subject: [PATCH 4/4] Address Phoebe's comments --- llvm/test/CodeGen/X86/bf16-fast-isel.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll index 9fdea0b899285..c659e0e647d36 100644 --- a/llvm/test/CodeGen/X86/bf16-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc --fast-isel < %s -mtriple=x86_64-unknown-unknown | FileCheck %s define i8 @test_direct_call(ptr %f) nounwind { ; CHECK-LABEL: test_direct_call: