From 08275b72c6740e37123914f2435f9c256c3343a7 Mon Sep 17 00:00:00 2001 From: Malay Sanghi Date: Fri, 28 Jun 2024 00:09:20 -0700 Subject: [PATCH 1/3] Add Instruction selection support for x87 ld/st Other ld/st also have c++ selection. --- .../X86/GISel/X86InstructionSelector.cpp | 28 ++- .../CodeGen/X86/GlobalISel/x86_64-fallback.ll | 9 - llvm/test/CodeGen/X86/GlobalISel/x87.ll | 221 ++++++++++++++++++ 3 files changed, 244 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/x87.ll diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 303783ea3fd22..64f977e9c9d66 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -195,6 +195,14 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { return &X86::VR512RegClass; } + if (RB.getID() == X86::PSRRegBankID) { + if (Ty.getSizeInBits() == 80) + return &X86::RFP80RegClass; + if (Ty.getSizeInBits() == 64) + return &X86::RFP64RegClass; + return &X86::RFP32RegClass; + } + llvm_unreachable("Unknown RegBank!"); } @@ -462,6 +470,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, : (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); + if (X86::PSRRegBankID == RB.getID()) + return Isload ? X86::LD_Fp32m : X86::ST_Fp32m; } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV64rm : X86::MOV64mr; @@ -472,6 +482,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, : (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); + if (X86::PSRRegBankID == RB.getID()) + return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; + } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) { + return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { if (Alignment >= Align(16)) return Isload ? (HasVLX ? X86::VMOVAPSZ128rm @@ -611,7 +625,10 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, I.removeOperand(0); addFullAddress(MIB, AM).addUse(DefReg); } - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); + if (Constrained) + I.addImplicitDefUseOperands(MF); + return Constrained; } static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { @@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, const Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); - Align Alignment = Align(DstTy.getSizeInBytes()); + // Create the load from the constant pool. + const ConstantFP *CFP = I.getOperand(1).getFPImm(); + const auto &DataLayout = MF.getDataLayout(); + Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType()); const DebugLoc &DbgLoc = I.getDebugLoc(); unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment); - // Create the load from the constant pool. - const ConstantFP *CFP = I.getOperand(1).getFPImm(); unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment); MachineInstr *LoadInst = nullptr; unsigned char OpFlag = STI.classifyLocalReference(nullptr); @@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment); + LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment); LoadInst = addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll index 39302734dde78..bb0f0ae14f304 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll @@ -7,15 +7,6 @@ ; When we cannot produce a test case anymore, that means we can remove ; the fallback path. -; Check that we fallback on invoke translation failures. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump -; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump: -define void @test_x86_fp80_dump(ptr %ptr){ - store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16 - ret void -} - ; Check that we fallback on byVal argument ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2 diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/GlobalISel/x87.ll new file mode 100644 index 0000000000000..ebec84b03ba20 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/x87.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64 + +define x86_fp80 @f0(x86_fp80 noundef %a) nounwind { +; GISEL_X86-LABEL: f0: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: pushl %ebp +; GISEL_X86-NEXT: movl %esp, %ebp +; GISEL_X86-NEXT: andl $-16, %esp +; GISEL_X86-NEXT: subl $48, %esp +; GISEL_X86-NEXT: fldt 8(%ebp) +; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} +; GISEL_X86-NEXT: fxch %st(1) +; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp) +; GISEL_X86-NEXT: fstpt (%esp) +; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL_X86-NEXT: fldt (%esp) +; GISEL_X86-NEXT: faddp %st, %st(1) +; GISEL_X86-NEXT: movl %ebp, %esp +; GISEL_X86-NEXT: popl %ebp +; GISEL_X86-NEXT: retl +; +; GISEL_X64-LABEL: f0: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: fxch %st(1) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: retq +; +; SDAG_X86-LABEL: f0: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: pushl %ebp +; SDAG_X86-NEXT: movl %esp, %ebp +; SDAG_X86-NEXT: andl $-16, %esp +; SDAG_X86-NEXT: subl $48, %esp +; SDAG_X86-NEXT: fldt 8(%ebp) +; SDAG_X86-NEXT: fld %st(0) +; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp) +; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; SDAG_X86-NEXT: fld %st(0) +; SDAG_X86-NEXT: fstpt (%esp) +; SDAG_X86-NEXT: faddp %st, %st(1) +; SDAG_X86-NEXT: movl %ebp, %esp +; SDAG_X86-NEXT: popl %ebp +; SDAG_X86-NEXT: retl +; +; SDAG_X64-LABEL: f0: +; SDAG_X64: # %bb.0: +; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: fld %st(0) +; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SDAG_X64-NEXT: fld %st(0) +; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: faddp %st, %st(1) +; SDAG_X64-NEXT: retq + %a.addr = alloca x86_fp80, align 16 + %x = alloca x86_fp80, align 16 + store x86_fp80 %a, ptr %a.addr, align 16 + store x86_fp80 0xK400A8000000000000000, ptr %x, align 16 + %load1 = load x86_fp80, ptr %a.addr, align 16 + %load2 = load x86_fp80, ptr %x, align 16 + %add = fadd x86_fp80 %load1, %load2 + ret x86_fp80 %add +} + + +define void @f1(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f1: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fsubrp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; CHECK-64-LABEL: f1: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fsubrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq +; +; SDAG_X86-LABEL: f1: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fsubrp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl + %load1 = load x86_fp80, ptr %a, align 4 + %load2 = load x86_fp80, ptr %b, align 4 + %sub = fsub x86_fp80 %load1, %load2 + store x86_fp80 %sub, ptr %a, align 4 + ret void +} + +define void @f2(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f2: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fmulp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; CHECK-64-LABEL: f2: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fmulp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq +; +; SDAG_X86-LABEL: f2: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fmulp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl + %load1 = load x86_fp80, ptr %a, align 16 + %load2 = load x86_fp80, ptr %b, align 16 + %mul = fmul x86_fp80 %load1, %load2 + store x86_fp80 %mul, ptr %a, align 16 + ret void +} + +define void @f3(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f3: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fdivrp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; CHECK-64-LABEL: f3: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fdivrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq +; +; SDAG_X86-LABEL: f3: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fdivrp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl + %load1 = load x86_fp80, ptr %a, align 4 + %load2 = load x86_fp80, ptr %b, align 4 + %div = fdiv x86_fp80 %load1, %load2 + store x86_fp80 %div, ptr %a, align 4 + ret void +} + +define void @f6(ptr %0, ptr %1) nounwind { +; GISEL_X86-LABEL: f6: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; GISEL_X86-NEXT: flds (%eax) +; GISEL_X86-NEXT: faddp %st, %st(1) +; GISEL_X86-NEXT: fstps (%ecx) +; GISEL_X86-NEXT: retl +; +; GISEL_X64-LABEL: f6: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: flds (%rdi) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: fstps (%rsi) +; GISEL_X64-NEXT: retq +; +; SDAG_X86-LABEL: f6: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: flds (%ecx) +; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}} +; SDAG_X86-NEXT: fstps (%eax) +; SDAG_X86-NEXT: retl +; +; SDAG_X64-LABEL: f6: +; SDAG_X64: # %bb.0: +; SDAG_X64-NEXT: flds (%rdi) +; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SDAG_X64-NEXT: fstps (%rsi) +; SDAG_X64-NEXT: retq + %load1 = load float, ptr %0 + %add = fadd float %load1, 20.0 + store float %add, ptr %1 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-32: {{.*}} From 90889419e17e54670531f84a01b9026d9aae20fc Mon Sep 17 00:00:00 2001 From: Malay Sanghi Date: Wed, 3 Jul 2024 04:10:35 -0700 Subject: [PATCH 2/3] review --- .../X86/GISel/X86InstructionSelector.cpp | 5 +- .../X86/{GlobalISel/x87.ll => isel-x87.ll} | 94 ++++++++++--------- 2 files changed, 52 insertions(+), 47 deletions(-) rename llvm/test/CodeGen/X86/{GlobalISel/x87.ll => isel-x87.ll} (96%) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 64f977e9c9d66..9b869b1d0fb7c 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -200,7 +200,8 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { return &X86::RFP80RegClass; if (Ty.getSizeInBits() == 64) return &X86::RFP64RegClass; - return &X86::RFP32RegClass; + if (Ty.getSizeInBits() == 32) + return &X86::RFP32RegClass; } llvm_unreachable("Unknown RegBank!"); @@ -484,7 +485,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, X86::MOVSDmr); if (X86::PSRRegBankID == RB.getID()) return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; - } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) { + } else if (Ty == LLT::scalar(80)) { return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { if (Alignment >= Align(16)) diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll similarity index 96% rename from llvm/test/CodeGen/X86/GlobalISel/x87.ll rename to llvm/test/CodeGen/X86/isel-x87.ll index ebec84b03ba20..f1adf279c7353 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x87.ll +++ b/llvm/test/CodeGen/X86/isel-x87.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64 ; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64 define x86_fp80 @f0(x86_fp80 noundef %a) nounwind { ; GISEL_X86-LABEL: f0: @@ -23,18 +25,6 @@ define x86_fp80 @f0(x86_fp80 noundef %a) nounwind { ; GISEL_X86-NEXT: popl %ebp ; GISEL_X86-NEXT: retl ; -; GISEL_X64-LABEL: f0: -; GISEL_X64: # %bb.0: -; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp) -; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; GISEL_X64-NEXT: fxch %st(1) -; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) -; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) -; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) -; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) -; GISEL_X64-NEXT: faddp %st, %st(1) -; GISEL_X64-NEXT: retq -; ; SDAG_X86-LABEL: f0: ; SDAG_X86: # %bb.0: ; SDAG_X86-NEXT: pushl %ebp @@ -52,6 +42,18 @@ define x86_fp80 @f0(x86_fp80 noundef %a) nounwind { ; SDAG_X86-NEXT: popl %ebp ; SDAG_X86-NEXT: retl ; +; GISEL_X64-LABEL: f0: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: fxch %st(1) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: retq +; ; SDAG_X64-LABEL: f0: ; SDAG_X64: # %bb.0: ; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp) @@ -84,14 +86,6 @@ define void @f1(ptr %a, ptr %b) nounwind { ; GISEL_X86-NEXT: fstpt (%eax) ; GISEL_X86-NEXT: retl ; -; CHECK-64-LABEL: f1: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: fldt (%rdi) -; CHECK-64-NEXT: fldt (%rsi) -; CHECK-64-NEXT: fsubrp %st, %st(1) -; CHECK-64-NEXT: fstpt (%rdi) -; CHECK-64-NEXT: retq -; ; SDAG_X86-LABEL: f1: ; SDAG_X86: # %bb.0: ; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -101,6 +95,14 @@ define void @f1(ptr %a, ptr %b) nounwind { ; SDAG_X86-NEXT: fsubrp %st, %st(1) ; SDAG_X86-NEXT: fstpt (%ecx) ; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f1: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fsubrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq %load1 = load x86_fp80, ptr %a, align 4 %load2 = load x86_fp80, ptr %b, align 4 %sub = fsub x86_fp80 %load1, %load2 @@ -119,14 +121,6 @@ define void @f2(ptr %a, ptr %b) nounwind { ; GISEL_X86-NEXT: fstpt (%eax) ; GISEL_X86-NEXT: retl ; -; CHECK-64-LABEL: f2: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: fldt (%rdi) -; CHECK-64-NEXT: fldt (%rsi) -; CHECK-64-NEXT: fmulp %st, %st(1) -; CHECK-64-NEXT: fstpt (%rdi) -; CHECK-64-NEXT: retq -; ; SDAG_X86-LABEL: f2: ; SDAG_X86: # %bb.0: ; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -136,6 +130,14 @@ define void @f2(ptr %a, ptr %b) nounwind { ; SDAG_X86-NEXT: fmulp %st, %st(1) ; SDAG_X86-NEXT: fstpt (%ecx) ; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f2: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fmulp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq %load1 = load x86_fp80, ptr %a, align 16 %load2 = load x86_fp80, ptr %b, align 16 %mul = fmul x86_fp80 %load1, %load2 @@ -154,14 +156,6 @@ define void @f3(ptr %a, ptr %b) nounwind { ; GISEL_X86-NEXT: fstpt (%eax) ; GISEL_X86-NEXT: retl ; -; CHECK-64-LABEL: f3: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: fldt (%rdi) -; CHECK-64-NEXT: fldt (%rsi) -; CHECK-64-NEXT: fdivrp %st, %st(1) -; CHECK-64-NEXT: fstpt (%rdi) -; CHECK-64-NEXT: retq -; ; SDAG_X86-LABEL: f3: ; SDAG_X86: # %bb.0: ; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -171,6 +165,14 @@ define void @f3(ptr %a, ptr %b) nounwind { ; SDAG_X86-NEXT: fdivrp %st, %st(1) ; SDAG_X86-NEXT: fstpt (%ecx) ; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f3: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fdivrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq %load1 = load x86_fp80, ptr %a, align 4 %load2 = load x86_fp80, ptr %b, align 4 %div = fdiv x86_fp80 %load1, %load2 @@ -189,14 +191,6 @@ define void @f6(ptr %0, ptr %1) nounwind { ; GISEL_X86-NEXT: fstps (%ecx) ; GISEL_X86-NEXT: retl ; -; GISEL_X64-LABEL: f6: -; GISEL_X64: # %bb.0: -; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; GISEL_X64-NEXT: flds (%rdi) -; GISEL_X64-NEXT: faddp %st, %st(1) -; GISEL_X64-NEXT: fstps (%rsi) -; GISEL_X64-NEXT: retq -; ; SDAG_X86-LABEL: f6: ; SDAG_X86: # %bb.0: ; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -206,6 +200,14 @@ define void @f6(ptr %0, ptr %1) nounwind { ; SDAG_X86-NEXT: fstps (%eax) ; SDAG_X86-NEXT: retl ; +; GISEL_X64-LABEL: f6: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: flds (%rdi) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: fstps (%rsi) +; GISEL_X64-NEXT: retq +; ; SDAG_X64-LABEL: f6: ; SDAG_X64: # %bb.0: ; SDAG_X64-NEXT: flds (%rdi) @@ -219,3 +221,5 @@ define void @f6(ptr %0, ptr %1) nounwind { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-32: {{.*}} +; FAST_X64: {{.*}} +; FAST_X86: {{.*}} From c57992c3f588f27871adcaeb96b262954300fad0 Mon Sep 17 00:00:00 2001 From: Malay Sanghi Date: Wed, 3 Jul 2024 05:41:09 -0700 Subject: [PATCH 3/3] rename vars --- llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp | 9 ++++----- llvm/test/CodeGen/X86/isel-x87.ll | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 9b869b1d0fb7c..d73873812eeb6 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -627,8 +627,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, addFullAddress(MIB, AM).addUse(DefReg); } bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); - if (Constrained) - I.addImplicitDefUseOperands(MF); + I.addImplicitDefUseOperands(MF); return Constrained; } @@ -1523,8 +1522,8 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); // Create the load from the constant pool. const ConstantFP *CFP = I.getOperand(1).getFPImm(); - const auto &DataLayout = MF.getDataLayout(); - Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType()); + const auto &DL = MF.getDataLayout(); + Align Alignment = DL.getPrefTypeAlign(CFP->getType()); const DebugLoc &DbgLoc = I.getDebugLoc(); unsigned Opc = @@ -1544,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment); + LLT::pointer(0, DL.getPointerSizeInBits()), Alignment); LoadInst = addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), diff --git a/llvm/test/CodeGen/X86/isel-x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll index f1adf279c7353..690c1f6ea968c 100644 --- a/llvm/test/CodeGen/X86/isel-x87.ll +++ b/llvm/test/CodeGen/X86/isel-x87.ll @@ -180,7 +180,7 @@ define void @f3(ptr %a, ptr %b) nounwind { ret void } -define void @f6(ptr %0, ptr %1) nounwind { +define void @f6(ptr %a, ptr %b) nounwind { ; GISEL_X86-LABEL: f6: ; GISEL_X86: # %bb.0: ; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -214,9 +214,9 @@ define void @f6(ptr %0, ptr %1) nounwind { ; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) ; SDAG_X64-NEXT: fstps (%rsi) ; SDAG_X64-NEXT: retq - %load1 = load float, ptr %0 + %load1 = load float, ptr %a %add = fadd float %load1, 20.0 - store float %add, ptr %1 + store float %add, ptr %b ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: