diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index e6db840c08020..3ccc73398064b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -1237,6 +1237,43 @@ namespace X86II { return false; } + inline bool canUseApxExtendedReg(const MCInstrDesc &Desc) { + uint64_t TSFlags = Desc.TSFlags; + uint64_t Encoding = TSFlags & EncodingMask; + // EVEX can always use egpr. + if (Encoding == X86II::EVEX) + return true; + + // To be conservative, egpr is not used for all pseudo instructions + // because we are not sure what instruction it will become. + // FIXME: Could we improve it in X86ExpandPseudo? + if (isPseudo(TSFlags)) + return false; + + // MAP OB/TB in legacy encoding space can always use egpr except + // XSAVE*/XRSTOR*. + unsigned Opcode = Desc.Opcode; + switch (Opcode) { + default: + break; + case X86::XSAVE: + case X86::XSAVE64: + case X86::XSAVEOPT: + case X86::XSAVEOPT64: + case X86::XSAVEC: + case X86::XSAVEC64: + case X86::XSAVES: + case X86::XSAVES64: + case X86::XRSTOR: + case X86::XRSTOR64: + case X86::XRSTORS: + case X86::XRSTORS64: + return false; + } + uint64_t OpMap = TSFlags & X86II::OpMapMask; + return !Encoding && (OpMap == X86II::OB || OpMap == X86II::TB); + } + /// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher) /// registers, e.g. zmm21, etc. static inline bool is32ExtendedReg(unsigned RegNo) { diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index e2935a687f98b..ade175d99c89a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -341,6 +341,8 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", "Support AVX10.1 up to 512-bit instruction", [FeatureAVX10_1, FeatureEVEX512]>; +def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", + "Support extended general purpose register">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4c6854da0ada3..56e3ac79b5957 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -92,6 +92,37 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Subtarget(STI), RI(STI.getTargetTriple()) { } +const TargetRegisterClass * +X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const { + auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF); + // If the target does not have egpr, then r16-r31 will be resereved for all + // instructions. + if (!RC || !Subtarget.hasEGPR()) + return RC; + + if (X86II::canUseApxExtendedReg(MCID)) + return RC; + + switch (RC->getID()) { + default: + return RC; + case X86::GR8RegClassID: + return &X86::GR8_NOREX2RegClass; + case X86::GR16RegClassID: + return &X86::GR16_NOREX2RegClass; + case X86::GR32RegClassID: + return &X86::GR32_NOREX2RegClass; + case X86::GR64RegClassID: + return &X86::GR64_NOREX2RegClass; + case X86::GR32_NOSPRegClassID: + return &X86::GR32_NOREX2_NOSPRegClass; + case X86::GR64_NOSPRegClassID: + return &X86::GR64_NOREX2_NOSPRegClass; + } +} + bool X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index e1199e20c318e..b0a2d2b890743 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -150,6 +150,17 @@ class X86InstrInfo final : public X86GenInstrInfo { public: explicit X86InstrInfo(X86Subtarget &STI); + /// Given a machine instruction descriptor, returns the register + /// class constraint for OpNum, or NULL. Returned register class + /// may be different from the definition in the TD file, e.g. + /// GR*RegClass (definition in TD file) + /// -> + /// GR*_NOREX2RegClass (Returned register class) + const TargetRegisterClass * + getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const override; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index b0bea42cafe11..8b121d74023d2 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -158,6 +158,10 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::GR16RegClassID: case X86::GR32RegClassID: case X86::GR64RegClassID: + case X86::GR8_NOREX2RegClassID: + case X86::GR16_NOREX2RegClassID: + case X86::GR32_NOREX2RegClassID: + case X86::GR64_NOREX2RegClassID: case X86::RFP32RegClassID: case X86::RFP64RegClassID: case X86::RFP80RegClassID: @@ -611,6 +615,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } + // Reserve the extended general purpose registers. + if (!Is64Bit || !MF.getSubtarget().hasEGPR()) + Reserved.set(X86::R16, X86::R31WH + 1); + assert(checkAllSuperRegsMarked(Reserved, {X86::SIL, X86::DIL, X86::BPL, X86::SPL, X86::SIH, X86::DIH, X86::BPH, X86::SPH})); @@ -629,13 +637,14 @@ unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { // APX registers (R16-R31) // // and try to return the minimum number of registers supported by the target. - assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) && - (X86::K6_K7 + 1 == X86::TMMCFG) && - (X86::TMM7 + 1 == X86::NUM_TARGET_REGS) && + (X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM7 + 1 == X86::R16) && + (X86::R31WH + 1 == X86::NUM_TARGET_REGS) && "Register number may be incorrect"); const X86Subtarget &ST = MF.getSubtarget(); + if (ST.hasEGPR()) + return X86::NUM_TARGET_REGS; if (ST.hasAMXTILE()) return X86::TMM7 + 1; if (ST.hasAVX512()) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 898a3f97e5236..166024bf3b53f 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -73,6 +73,44 @@ def R12B : X86Reg<"r12b", 12>; def R13B : X86Reg<"r13b", 13>; def R14B : X86Reg<"r14b", 14>; def R15B : X86Reg<"r15b", 15>; +// RAGreedy prefers to select a cheaper register +// For x86, +// Cost(caller-save reg) < Cost(callee-save reg) +// b/c callee-save register needs push/pop in prolog/epilog. +// If both registers are callee-saved or caller-saved, +// Cost(short-encoding reg) < Cost(long-encoding reg) +// +// To achieve this, we do the following things: +// 1. Set CostPerUse=1 for registers that need prefix +// 2. Consider callee-save register is never cheaper than a register w/ cost 1 +// 3. List caller-save register before callee-save regsiter in RegisterClass +// or AllocationOrder +// +// NOTE: +// D133902 stopped assigning register costs for R8-R15, which brought gain +// and regression. We don't know if we should assign cost to R16-R31 w/o +// performance data. +// TODO: +// Update the comment/cost after tuning. +// APX only, requires REX2 or EVEX. +let PositionOrder = 4 in { +def R16B : X86Reg<"r16b", 16>; +def R17B : X86Reg<"r17b", 17>; +def R18B : X86Reg<"r18b", 18>; +def R19B : X86Reg<"r19b", 19>; +def R20B : X86Reg<"r20b", 20>; +def R21B : X86Reg<"r21b", 21>; +def R22B : X86Reg<"r22b", 22>; +def R23B : X86Reg<"r23b", 23>; +def R24B : X86Reg<"r24b", 24>; +def R25B : X86Reg<"r25b", 25>; +def R26B : X86Reg<"r26b", 26>; +def R27B : X86Reg<"r27b", 27>; +def R28B : X86Reg<"r28b", 28>; +def R29B : X86Reg<"r29b", 29>; +def R30B : X86Reg<"r30b", 30>; +def R31B : X86Reg<"r31b", 31>; +} let isArtificial = 1 in { // High byte of the low 16 bits of the super-register: @@ -88,6 +126,24 @@ def R12BH : X86Reg<"", -1>; def R13BH : X86Reg<"", -1>; def R14BH : X86Reg<"", -1>; def R15BH : X86Reg<"", -1>; +let PositionOrder = 4 in { +def R16BH : X86Reg<"", -1>; +def R17BH : X86Reg<"", -1>; +def R18BH : X86Reg<"", -1>; +def R19BH : X86Reg<"", -1>; +def R20BH : X86Reg<"", -1>; +def R21BH : X86Reg<"", -1>; +def R22BH : X86Reg<"", -1>; +def R23BH : X86Reg<"", -1>; +def R24BH : X86Reg<"", -1>; +def R25BH : X86Reg<"", -1>; +def R26BH : X86Reg<"", -1>; +def R27BH : X86Reg<"", -1>; +def R28BH : X86Reg<"", -1>; +def R29BH : X86Reg<"", -1>; +def R30BH : X86Reg<"", -1>; +def R31BH : X86Reg<"", -1>; +} // High word of the low 32 bits of the super-register: def HAX : X86Reg<"", -1>; def HDX : X86Reg<"", -1>; @@ -106,6 +162,24 @@ def R12WH : X86Reg<"", -1>; def R13WH : X86Reg<"", -1>; def R14WH : X86Reg<"", -1>; def R15WH : X86Reg<"", -1>; +let PositionOrder = 4 in { +def R16WH : X86Reg<"", -1>; +def R17WH : X86Reg<"", -1>; +def R18WH : X86Reg<"", -1>; +def R19WH : X86Reg<"", -1>; +def R20WH : X86Reg<"", -1>; +def R21WH : X86Reg<"", -1>; +def R22WH : X86Reg<"", -1>; +def R23WH : X86Reg<"", -1>; +def R24WH : X86Reg<"", -1>; +def R25WH : X86Reg<"", -1>; +def R26WH : X86Reg<"", -1>; +def R27WH : X86Reg<"", -1>; +def R28WH : X86Reg<"", -1>; +def R29WH : X86Reg<"", -1>; +def R30WH : X86Reg<"", -1>; +def R31WH : X86Reg<"", -1>; +} } // 16-bit registers @@ -134,6 +208,27 @@ def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>; def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>; def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>; } +// APX only, requires REX2 or EVEX. +let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in { +let PositionOrder = 4 in { +def R16W : X86Reg<"r16w", 16, [R16B,R16BH]>; +def R17W : X86Reg<"r17w", 17, [R17B,R17BH]>; +def R18W : X86Reg<"r18w", 18, [R18B,R18BH]>; +def R19W : X86Reg<"r19w", 19, [R19B,R19BH]>; +def R20W : X86Reg<"r20w", 20, [R20B,R20BH]>; +def R21W : X86Reg<"r21w", 21, [R21B,R21BH]>; +def R22W : X86Reg<"r22w", 22, [R22B,R22BH]>; +def R23W : X86Reg<"r23w", 23, [R23B,R23BH]>; +def R24W : X86Reg<"r24w", 24, [R24B,R24BH]>; +def R25W : X86Reg<"r25w", 25, [R25B,R25BH]>; +def R26W : X86Reg<"r26w", 26, [R26B,R26BH]>; +def R27W : X86Reg<"r27w", 27, [R27B,R27BH]>; +def R28W : X86Reg<"r28w", 28, [R28B,R28BH]>; +def R29W : X86Reg<"r29w", 29, [R29B,R29BH]>; +def R30W : X86Reg<"r30w", 30, [R30B,R30BH]>; +def R31W : X86Reg<"r31w", 31, [R31B,R31BH]>; +} +} // 32-bit registers let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in { @@ -160,6 +255,27 @@ def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>; def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>; } +// APX only, requires REX2 or EVEX. +let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in { +let PositionOrder = 4 in { +def R16D : X86Reg<"r16d", 16, [R16W,R16WH]>; +def R17D : X86Reg<"r17d", 17, [R17W,R17WH]>; +def R18D : X86Reg<"r18d", 18, [R18W,R18WH]>; +def R19D : X86Reg<"r19d", 19, [R19W,R19WH]>; +def R20D : X86Reg<"r20d", 20, [R20W,R20WH]>; +def R21D : X86Reg<"r21d", 21, [R21W,R21WH]>; +def R22D : X86Reg<"r22d", 22, [R22W,R22WH]>; +def R23D : X86Reg<"r23d", 23, [R23W,R23WH]>; +def R24D : X86Reg<"r24d", 24, [R24W,R24WH]>; +def R25D : X86Reg<"r25d", 25, [R25W,R25WH]>; +def R26D : X86Reg<"r26d", 26, [R26W,R26WH]>; +def R27D : X86Reg<"r27d", 27, [R27W,R27WH]>; +def R28D : X86Reg<"r28d", 28, [R28W,R28WH]>; +def R29D : X86Reg<"r29d", 29, [R29W,R29WH]>; +def R30D : X86Reg<"r30d", 30, [R30W,R30WH]>; +def R31D : X86Reg<"r31d", 31, [R31W,R31WH]>; +} +} // 64-bit registers, X86-64 only let SubRegIndices = [sub_32bit] in { def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>; @@ -181,6 +297,25 @@ def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>; def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>; +// APX only, requires REX2 or EVEX. +let PositionOrder = 4 in { +def R16 : X86Reg<"r16", 16, [R16D]>, DwarfRegNum<[130, -2, -2]>; +def R17 : X86Reg<"r17", 17, [R17D]>, DwarfRegNum<[131, -2, -2]>; +def R18 : X86Reg<"r18", 18, [R18D]>, DwarfRegNum<[132, -2, -2]>; +def R19 : X86Reg<"r19", 19, [R19D]>, DwarfRegNum<[133, -2, -2]>; +def R20 : X86Reg<"r20", 20, [R20D]>, DwarfRegNum<[134, -2, -2]>; +def R21 : X86Reg<"r21", 21, [R21D]>, DwarfRegNum<[135, -2, -2]>; +def R22 : X86Reg<"r22", 22, [R22D]>, DwarfRegNum<[136, -2, -2]>; +def R23 : X86Reg<"r23", 23, [R23D]>, DwarfRegNum<[137, -2, -2]>; +def R24 : X86Reg<"r24", 24, [R24D]>, DwarfRegNum<[138, -2, -2]>; +def R25 : X86Reg<"r25", 25, [R25D]>, DwarfRegNum<[139, -2, -2]>; +def R26 : X86Reg<"r26", 26, [R26D]>, DwarfRegNum<[140, -2, -2]>; +def R27 : X86Reg<"r27", 27, [R27D]>, DwarfRegNum<[141, -2, -2]>; +def R28 : X86Reg<"r28", 28, [R28D]>, DwarfRegNum<[142, -2, -2]>; +def R29 : X86Reg<"r29", 29, [R29D]>, DwarfRegNum<[143, -2, -2]>; +def R30 : X86Reg<"r30", 30, [R30D]>, DwarfRegNum<[144, -2, -2]>; +def R31 : X86Reg<"r31", 31, [R31D]>, DwarfRegNum<[145, -2, -2]>; +} } // MMX Registers. These are actually aliased to ST0 .. ST7 @@ -407,9 +542,11 @@ def SSP : X86Reg<"ssp", 0>; // instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc. // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. -def GR8 : RegisterClass<"X86", [i8], 8, +def GR8 : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, - R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> { + R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B, + R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B, + R30B, R31B, R14B, R15B, R12B, R13B)> { let AltOrders = [(sub GR8, AH, BH, CH, DH)]; let AltOrderSelect = [{ return MF.getSubtarget().is64Bit(); @@ -417,23 +554,28 @@ def GR8 : RegisterClass<"X86", [i8], 8, } let isAllocatable = 0 in -def GRH8 : RegisterClass<"X86", [i8], 8, +def GRH8 : RegisterClass<"X86", [i8], 8, (add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH, - R12BH, R13BH, R14BH, R15BH)>; - + R12BH, R13BH, R14BH, R15BH, R16BH, R17BH, R18BH, + R19BH, R20BH, R21BH, R22BH, R23BH, R24BH, R25BH, + R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>; def GR16 : RegisterClass<"X86", [i16], 16, - (add AX, CX, DX, SI, DI, BX, BP, SP, - R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>; + (add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, + R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W, + R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W, + R15W, R12W, R13W)>; let isAllocatable = 0 in def GRH16 : RegisterClass<"X86", [i16], 16, - (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP, - R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH, - R15WH)>; - + (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP, R8WH, + R9WH, R10WH, R11WH, R12WH, R13WH, R14WH, R15WH, R16WH, + R17WH, R18WH, R19WH, R20WH, R21WH, R22WH, R23WH, R24WH, + R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>; def GR32 : RegisterClass<"X86", [i32], 32, - (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>; + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, + R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D, + R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D, + R14D, R15D, R12D, R13D)>; // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an @@ -441,8 +583,9 @@ def GR32 : RegisterClass<"X86", [i32], 32, // FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra // tests because of the inclusion of RIP in this register class. def GR64 : RegisterClass<"X86", [i64], 64, - (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP, RSP, RIP)>; + (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17, + R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>; // GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when // emitting code for intrinsics, which use implict input registers. @@ -508,6 +651,27 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>; +// GeneratePressureSet = 0 here is a temporary workaround for lots of +// LIT fail. Whether enabling in the future still needs discussion. +let GeneratePressureSet = 0 in { +// GR8_NOREX2 - GR8 registers which do not require a REX2 prefix. +def GR8_NOREX2 : RegisterClass<"X86", [i8], 8, + (sub GR8, (sequence "R%uB", 16, 31))> { + let AltOrders = [(sub GR8_NOREX2, AH, BH, CH, DH)]; + let AltOrderSelect = [{ + return MF.getSubtarget().is64Bit(); + }]; +} +// GR16_NOREX2 - GR16 registers which do not require a REX2 prefix. +def GR16_NOREX2 : RegisterClass<"X86", [i16], 16, + (sub GR16, (sequence "R%uW", 16, 31))>; +// GR32_NOREX2 - GR32 registers which do not require a REX2 prefix. +def GR32_NOREX2 : RegisterClass<"X86", [i32], 32, + (sub GR32, (sequence "R%uD", 16, 31))>; +// GR64_NOREX2 - GR64 registers which do not require a REX2 prefix. +def GR64_NOREX2 : RegisterClass<"X86", [i64], 64, + (sub GR64, (sequence "R%u", 16, 31))>; +} // GR32_NOSP - GR32 registers except ESP. def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>; @@ -523,6 +687,15 @@ def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32, // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, (and GR64_NOREX, GR64_NOSP)>; +let GeneratePressureSet = 0 in { +// GR32_NOREX2_NOSP - GR32_NOREX2 registers except ESP. +def GR32_NOREX2_NOSP : RegisterClass<"X86", [i32], 32, + (sub GR32_NOREX2, ESP)>; + +// GR64_NOREX2_NOSP - GR64_NOREX2 registers except RSP, RIP. +def GR64_NOREX2_NOSP : RegisterClass<"X86", [i64], 64, + (sub GR64_NOREX2, RSP, RIP)>; +} // Register classes used for ABIs that use 32-bit address accesses, // while using the whole x86_64 ISA. diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir index f92d49cabdcda..2ac4d7cccac07 100644 --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -28,8 +28,8 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, - INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4784138 /* regdef:GR64 */, def $rsi, 4784138 /* regdef:GR64 */, def dead $rdi, + INLINEASM &foo, 0, 4784138, def $rsi, 4784138, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... @@ -45,8 +45,8 @@ body: | ; Verify that the register ties are preserved. ; CHECK-LABEL: name: test2 - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags - INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4784138 /* regdef:GR64 */, def $rsi, 4784138 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &foo, 0, 4784138, def $rsi, 4784138, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll new file mode 100644 index 0000000000000..3bffd02cbbab9 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll @@ -0,0 +1,1039 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+egpr | FileCheck %s --check-prefix=APX + +define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind { +; APX-LABEL: test_1024: +; APX: # %bb.0: +; APX-NEXT: pushq %rbp +; APX-NEXT: pushq %r15 +; APX-NEXT: pushq %r14 +; APX-NEXT: pushq %r13 +; APX-NEXT: pushq %r12 +; APX-NEXT: pushq %rbx +; APX-NEXT: subq $104, %rsp +; APX-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %rdi, %r24 +; APX-NEXT: movq (%rdi), %r13 +; APX-NEXT: movq 8(%rdi), %r18 +; APX-NEXT: movq 24(%rdi), %r29 +; APX-NEXT: movq 16(%rdi), %r17 +; APX-NEXT: movq 40(%rdi), %rdi +; APX-NEXT: movq 32(%r24), %r10 +; APX-NEXT: movq 56(%r24), %r15 +; APX-NEXT: movq 48(%r24), %r12 +; APX-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq 24(%rsi), %r23 +; APX-NEXT: movq 16(%rsi), %r11 +; APX-NEXT: movq (%rsi), %r27 +; APX-NEXT: movq 8(%rsi), %r14 +; APX-NEXT: movq %r12, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r19 +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r12, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %r20 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r20 +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %ecx +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r20, %r16 +; APX-NEXT: adcq %rcx, %r9 +; APX-NEXT: movq %r10, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r20 +; APX-NEXT: movq %rax, %r25 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: addq %r20, %r22 +; APX-NEXT: adcq $0, %r21 +; APX-NEXT: movq %r10, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %r20 +; APX-NEXT: movq %rax, %r28 +; APX-NEXT: addq %r22, %r28 +; APX-NEXT: adcq %r21, %r20 +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %ecx +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: addq %r20, %r22 +; APX-NEXT: adcq %rcx, %r21 +; APX-NEXT: addq %r19, %r22 +; APX-NEXT: adcq %r8, %r21 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r10, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r30 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r19 +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: addq %r8, %r20 +; APX-NEXT: adcq $0, %r19 +; APX-NEXT: movq %r10, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %rbx +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r20, %r31 +; APX-NEXT: adcq %r19, %rbx +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %ecx +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %rbx, %r8 +; APX-NEXT: adcq %rcx, %r26 +; APX-NEXT: addq %r22, %r30 +; APX-NEXT: adcq %r21, %r31 +; APX-NEXT: adcq $0, %r8 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r26 +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %ecx +; APX-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r12, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r16 +; APX-NEXT: movq %rax, %r21 +; APX-NEXT: addq %r9, %r21 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: movq %r12, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %rdi +; APX-NEXT: addq %r21, %rdi +; APX-NEXT: adcq %r16, %r9 +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %r10d +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: addq %r9, %r22 +; APX-NEXT: adcq %r10, %r21 +; APX-NEXT: addq %r8, %rsi +; APX-NEXT: movq %rsi, %r19 +; APX-NEXT: adcq %r26, %rdi +; APX-NEXT: adcq %rcx, %r22 +; APX-NEXT: adcq $0, %r21 +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rbx +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r16, %r26 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %al +; APX-NEXT: movzbl %al, %ecx +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %r14, %rsi +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq %rcx, %r9 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r14 +; APX-NEXT: movq %rax, %r15 +; APX-NEXT: addq %r8, %r15 +; APX-NEXT: adcq $0, %r14 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: mulq %rsi +; APX-NEXT: movq %rdx, %r12 +; APX-NEXT: addq %r15, %rax +; APX-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r14, %r12 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %rsi +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r15 +; APX-NEXT: addq %r12, %r15 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r8 +; APX-NEXT: addq %rbx, %r15 +; APX-NEXT: adcq %r26, %r8 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %rbx +; APX-NEXT: movq %rax, %r14 +; APX-NEXT: addq %r26, %r14 +; APX-NEXT: adcq $0, %rbx +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r12 +; APX-NEXT: addq %r14, %rax +; APX-NEXT: movq %rax, %r10 +; APX-NEXT: adcq %rbx, %r12 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r14 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r12, %r26 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r14 +; APX-NEXT: addq %r15, %rsi +; APX-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r8, %r10 +; APX-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: adcq $0, %r14 +; APX-NEXT: addq %r16, %r26 +; APX-NEXT: adcq %r9, %r14 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r17, %rax +; APX-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rbx +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r15 +; APX-NEXT: addq %r16, %r15 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %r9b +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r23 +; APX-NEXT: movq %rdx, %r12 +; APX-NEXT: movq %rax, %rbp +; APX-NEXT: addq %r8, %rbp +; APX-NEXT: movzbl %r9b, %eax +; APX-NEXT: adcq %rax, %r12 +; APX-NEXT: addq %r26, %rbx +; APX-NEXT: adcq %r14, %r15 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %rbp +; APX-NEXT: adcq $0, %r12 +; APX-NEXT: addq %r25, %rbx +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq 32(%rsi), %r25 +; APX-NEXT: adcq %r28, %r15 +; APX-NEXT: adcq %r30, %rbp +; APX-NEXT: adcq %r31, %r12 +; APX-NEXT: adcq $0, %r19 +; APX-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq $0, %rdi +; APX-NEXT: adcq $0, %r22 +; APX-NEXT: adcq $0, %r21 +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r28 +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq 40(%rsi), %rcx +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r16, %r26 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %r10b +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: movzbl %r10b, %eax +; APX-NEXT: adcq %rax, %r9 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r19 +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r30 +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r8, %r31 +; APX-NEXT: adcq $0, %r30 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: addq %r31, %r20 +; APX-NEXT: adcq %r30, %r8 +; APX-NEXT: setb %r10b +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r30 +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r8, %r31 +; APX-NEXT: movzbl %r10b, %eax +; APX-NEXT: adcq %rax, %r30 +; APX-NEXT: addq %r28, %r31 +; APX-NEXT: adcq %r26, %r30 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq 48(%rsi), %r28 +; APX-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r11 +; APX-NEXT: movq %r18, %rax +; APX-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r14 +; APX-NEXT: addq %r8, %r14 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: movq 56(%rsi), %r10 +; APX-NEXT: movq %r13, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r13 +; APX-NEXT: addq %r14, %rax +; APX-NEXT: movq %rax, %r14 +; APX-NEXT: adcq %r26, %r13 +; APX-NEXT: setb %sil +; APX-NEXT: movq %r18, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r13, %r8 +; APX-NEXT: movzbl %sil, %eax +; APX-NEXT: adcq %rax, %r26 +; APX-NEXT: addq %r31, %r11 +; APX-NEXT: adcq %r30, %r14 +; APX-NEXT: adcq $0, %r8 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r26 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r30 +; APX-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r16 +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r9, %r31 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r17 +; APX-NEXT: addq %r31, %r17 +; APX-NEXT: adcq %r16, %r9 +; APX-NEXT: setb %r16b +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r13 +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r9, %r31 +; APX-NEXT: movzbl %r16b, %eax +; APX-NEXT: adcq %rax, %r13 +; APX-NEXT: addq %r8, %r30 +; APX-NEXT: adcq %r26, %r17 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r31 +; APX-NEXT: adcq $0, %r13 +; APX-NEXT: addq %rbx, %r19 +; APX-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r15, %r20 +; APX-NEXT: movq %r20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %rbp, %r11 +; APX-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r12, %r14 +; APX-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq $0, %r30 +; APX-NEXT: adcq $0, %r17 +; APX-NEXT: adcq $0, %r31 +; APX-NEXT: adcq $0, %r13 +; APX-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload +; APX-NEXT: adcq %rdi, %r17 +; APX-NEXT: adcq %r22, %r31 +; APX-NEXT: adcq %r21, %r13 +; APX-NEXT: setb %r15b +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r19 +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload +; APX-NEXT: movq %r21, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %rsi, %rax +; APX-NEXT: movq %rsi, %r29 +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: addq %r16, %r20 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r21, %rax +; APX-NEXT: movq %r21, %r14 +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r9 +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rdi +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: addq %r8, %r22 +; APX-NEXT: adcq $0, %r21 +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: addq %r22, %rax +; APX-NEXT: movq %rax, %r11 +; APX-NEXT: adcq %r21, %r8 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %rsi, %rax +; APX-NEXT: movq %rsi, %r21 +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r22 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r8, %r26 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r22 +; APX-NEXT: addq %r19, %r26 +; APX-NEXT: adcq %r20, %r22 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r21, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r19 +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: addq %r8, %r20 +; APX-NEXT: adcq $0, %r19 +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %rbx +; APX-NEXT: addq %r20, %rax +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: adcq %r19, %rbx +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r21, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %rbx, %r8 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r21 +; APX-NEXT: addq %r26, %rsi +; APX-NEXT: adcq %r22, %r20 +; APX-NEXT: adcq $0, %r8 +; APX-NEXT: adcq $0, %r21 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r21 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %r28 +; APX-NEXT: movq %rdx, %r16 +; APX-NEXT: movq %rax, %r19 +; APX-NEXT: addq %r9, %r19 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: movq %r29, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: addq %r19, %rax +; APX-NEXT: movq %rax, %r19 +; APX-NEXT: adcq %r16, %r9 +; APX-NEXT: setb %r16b +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %r10 +; APX-NEXT: movq %rdx, %rbp +; APX-NEXT: movq %rax, %r12 +; APX-NEXT: addq %r9, %r12 +; APX-NEXT: movzbl %r16b, %eax +; APX-NEXT: adcq %rax, %rbp +; APX-NEXT: addq %r8, %r22 +; APX-NEXT: adcq %r21, %r19 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r12 +; APX-NEXT: adcq $0, %rbp +; APX-NEXT: addq %r30, %rdi +; APX-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r17, %r11 +; APX-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r31, %rsi +; APX-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r13, %r20 +; APX-NEXT: movq %r20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movzbl %r15b, %eax +; APX-NEXT: adcq %rax, %r22 +; APX-NEXT: movq %r22, (%rsp) # 8-byte Spill +; APX-NEXT: adcq $0, %r19 +; APX-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq $0, %r12 +; APX-NEXT: adcq $0, %rbp +; APX-NEXT: movq 64(%r24), %r21 +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r21 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: movq %r23, %rax +; APX-NEXT: mulq %r21 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq 72(%r24), %r30 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r30 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r16, %r26 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r23, %rax +; APX-NEXT: mulq %r30 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r9 +; APX-NEXT: movq %r27, %rax +; APX-NEXT: mulq %r21 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; APX-NEXT: movq %r11, %rax +; APX-NEXT: mulq %r21 +; APX-NEXT: movq %rdx, %r31 +; APX-NEXT: movq %rax, %rbx +; APX-NEXT: addq %r8, %rbx +; APX-NEXT: adcq $0, %r31 +; APX-NEXT: movq %r27, %rax +; APX-NEXT: mulq %r30 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: addq %rbx, %rax +; APX-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r31, %r8 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r11, %rax +; APX-NEXT: mulq %r30 +; APX-NEXT: movq %rdx, %r31 +; APX-NEXT: movq %rax, %rbx +; APX-NEXT: addq %r8, %rbx +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r31 +; APX-NEXT: addq %r22, %rbx +; APX-NEXT: adcq %r26, %r31 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq 80(%r24), %r13 +; APX-NEXT: movq %r27, %rax +; APX-NEXT: mulq %r13 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r11, %rax +; APX-NEXT: mulq %r13 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r14 +; APX-NEXT: addq %r8, %r14 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: movq 88(%r24), %r18 +; APX-NEXT: movq %r27, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r15 +; APX-NEXT: movq %rax, %r22 +; APX-NEXT: addq %r14, %r22 +; APX-NEXT: adcq %r26, %r15 +; APX-NEXT: setb %r14b +; APX-NEXT: movq %r11, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r15, %r8 +; APX-NEXT: movzbl %r14b, %eax +; APX-NEXT: adcq %rax, %r26 +; APX-NEXT: addq %rbx, %rsi +; APX-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; APX-NEXT: adcq %r31, %r22 +; APX-NEXT: adcq $0, %r8 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r26 +; APX-NEXT: setb %r31b +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r13 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r23, %rax +; APX-NEXT: mulq %r13 +; APX-NEXT: movq %rdx, %r16 +; APX-NEXT: movq %rax, %r14 +; APX-NEXT: addq %r9, %r14 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %rbx +; APX-NEXT: addq %r14, %rbx +; APX-NEXT: adcq %r16, %r9 +; APX-NEXT: setb %r16b +; APX-NEXT: movq %r23, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r14 +; APX-NEXT: movq %rax, %r15 +; APX-NEXT: addq %r9, %r15 +; APX-NEXT: movzbl %r16b, %eax +; APX-NEXT: adcq %rax, %r14 +; APX-NEXT: addq %r8, %rsi +; APX-NEXT: adcq %r26, %rbx +; APX-NEXT: movzbl %r31b, %eax +; APX-NEXT: adcq %rax, %r15 +; APX-NEXT: adcq $0, %r14 +; APX-NEXT: imulq %r25, %r18 +; APX-NEXT: movq %r25, %rax +; APX-NEXT: mulq %r13 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r18, %rdx +; APX-NEXT: imulq %rcx, %r13 +; APX-NEXT: addq %rdx, %r13 +; APX-NEXT: movq %r28, %r9 +; APX-NEXT: imulq %r30, %r9 +; APX-NEXT: movq %r28, %rax +; APX-NEXT: mulq %r21 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r9, %rdx +; APX-NEXT: imulq %r21, %r10 +; APX-NEXT: addq %rdx, %r10 +; APX-NEXT: addq %r8, %r26 +; APX-NEXT: adcq %r13, %r10 +; APX-NEXT: movq %r21, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r9 +; APX-NEXT: movq %r30, %rax +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rdx, %r25 +; APX-NEXT: movq %rax, %r28 +; APX-NEXT: addq %r8, %r28 +; APX-NEXT: adcq $0, %r25 +; APX-NEXT: movq %r21, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r28, %r16 +; APX-NEXT: adcq %r25, %r8 +; APX-NEXT: setb %r18b +; APX-NEXT: movq %r30, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r21 +; APX-NEXT: movq %rax, %r28 +; APX-NEXT: addq %r8, %r28 +; APX-NEXT: movzbl %r18b, %eax +; APX-NEXT: adcq %rax, %r21 +; APX-NEXT: addq %r26, %r28 +; APX-NEXT: adcq %r10, %r21 +; APX-NEXT: movq 112(%r24), %rcx +; APX-NEXT: movq %r27, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: imulq %r11, %rcx +; APX-NEXT: addq %rdx, %rcx +; APX-NEXT: movq 120(%r24), %rax +; APX-NEXT: imulq %r27, %rax +; APX-NEXT: addq %rax, %rcx +; APX-NEXT: movq 96(%r24), %r25 +; APX-NEXT: movq 104(%r24), %r26 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: imulq %r26, %rdi +; APX-NEXT: mulq %r25 +; APX-NEXT: movq %rax, %r29 +; APX-NEXT: addq %rdi, %rdx +; APX-NEXT: imulq %r25, %r23 +; APX-NEXT: addq %rdx, %r23 +; APX-NEXT: addq %r8, %r29 +; APX-NEXT: adcq %rcx, %r23 +; APX-NEXT: movq %r25, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r20 +; APX-NEXT: movq %r26, %rax +; APX-NEXT: mulq %r27 +; APX-NEXT: movq %rdx, %r27 +; APX-NEXT: movq %rax, %r30 +; APX-NEXT: addq %r8, %r30 +; APX-NEXT: adcq $0, %r27 +; APX-NEXT: movq %r25, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r25 +; APX-NEXT: addq %r30, %r25 +; APX-NEXT: adcq %r27, %r8 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r26, %rax +; APX-NEXT: mulq %r11 +; APX-NEXT: movq %rdx, %r24 +; APX-NEXT: movq %rax, %r27 +; APX-NEXT: addq %r8, %r27 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r24 +; APX-NEXT: addq %r29, %r27 +; APX-NEXT: adcq %r23, %r24 +; APX-NEXT: addq %r9, %r20 +; APX-NEXT: adcq %r16, %r25 +; APX-NEXT: adcq %r28, %r27 +; APX-NEXT: adcq %r21, %r24 +; APX-NEXT: addq %rsi, %r20 +; APX-NEXT: adcq %rbx, %r25 +; APX-NEXT: adcq %r15, %r27 +; APX-NEXT: adcq %r14, %r24 +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; APX-NEXT: movq 80(%r11), %rbx +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Reload +; APX-NEXT: mulq %r19 +; APX-NEXT: movq %rax, %r21 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq 88(%r11), %r28 +; APX-NEXT: movq %r28, %rax +; APX-NEXT: mulq %r19 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r17 # 8-byte Reload +; APX-NEXT: mulq %r17 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %r16, %r26 +; APX-NEXT: adcq %r9, %r8 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r28, %rax +; APX-NEXT: mulq %r17 +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r16 +; APX-NEXT: addq %r8, %r16 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r9 +; APX-NEXT: movq 64(%r11), %r15 +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r19 +; APX-NEXT: movq %rax, %r23 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq 72(%r11), %r14 +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %r19 +; APX-NEXT: movq %rdx, %r30 +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: addq %r8, %r31 +; APX-NEXT: adcq $0, %r30 +; APX-NEXT: movq %r15, %rax +; APX-NEXT: mulq %r17 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r29 +; APX-NEXT: addq %r31, %r29 +; APX-NEXT: adcq %r30, %r8 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %r17 +; APX-NEXT: movq %rdx, %r31 +; APX-NEXT: movq %rax, %r13 +; APX-NEXT: addq %r8, %r13 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r31 +; APX-NEXT: addq %r21, %r13 +; APX-NEXT: adcq %r26, %r31 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %r15, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; APX-NEXT: mulq %rdi +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r30 +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %rdi +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %rcx +; APX-NEXT: addq %r8, %rcx +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: movq %r15, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Reload +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r10 +; APX-NEXT: movq %rax, %r21 +; APX-NEXT: addq %rcx, %r21 +; APX-NEXT: adcq %r26, %r10 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r14, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r26 +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r10, %r8 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r26 +; APX-NEXT: addq %r13, %r30 +; APX-NEXT: adcq %r31, %r21 +; APX-NEXT: adcq $0, %r8 +; APX-NEXT: adcq $0, %r26 +; APX-NEXT: addq %r16, %r8 +; APX-NEXT: adcq %r9, %r26 +; APX-NEXT: setb %sil +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %rdi +; APX-NEXT: movq %rdx, %rcx +; APX-NEXT: movq %rax, %r31 +; APX-NEXT: movq %r28, %rax +; APX-NEXT: mulq %rdi +; APX-NEXT: movq %rdx, %r9 +; APX-NEXT: movq %rax, %r10 +; APX-NEXT: addq %rcx, %r10 +; APX-NEXT: adcq $0, %r9 +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %rcx +; APX-NEXT: movq %rax, %r13 +; APX-NEXT: addq %r10, %r13 +; APX-NEXT: adcq %r9, %rcx +; APX-NEXT: setb %r10b +; APX-NEXT: movq %r28, %rax +; APX-NEXT: mulq %r18 +; APX-NEXT: movq %rdx, %r16 +; APX-NEXT: movq %rax, %r9 +; APX-NEXT: addq %rcx, %r9 +; APX-NEXT: movzbl %r10b, %eax +; APX-NEXT: adcq %rax, %r16 +; APX-NEXT: addq %r8, %r31 +; APX-NEXT: adcq %r26, %r13 +; APX-NEXT: movzbl %sil, %eax +; APX-NEXT: adcq %rax, %r9 +; APX-NEXT: adcq $0, %r16 +; APX-NEXT: movq 96(%r11), %rcx +; APX-NEXT: imulq %rcx, %r18 +; APX-NEXT: movq %rcx, %rax +; APX-NEXT: mulq %rdi +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: addq %r18, %rdx +; APX-NEXT: movq 104(%r11), %r26 +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: imulq %r26, %rax +; APX-NEXT: addq %rdx, %rax +; APX-NEXT: movq %rax, %r10 +; APX-NEXT: movq 112(%r11), %rax +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: imulq %r17, %rsi +; APX-NEXT: mulq %r19 +; APX-NEXT: movq %rax, %rdi +; APX-NEXT: addq %rsi, %rdx +; APX-NEXT: movq 120(%r11), %r18 +; APX-NEXT: imulq %r19, %r18 +; APX-NEXT: addq %rdx, %r18 +; APX-NEXT: addq %r8, %rdi +; APX-NEXT: adcq %r10, %r18 +; APX-NEXT: movq %r19, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %rsi +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %rcx +; APX-NEXT: movq %rdx, %rcx +; APX-NEXT: movq %rax, %r10 +; APX-NEXT: addq %r8, %r10 +; APX-NEXT: adcq $0, %rcx +; APX-NEXT: movq %r19, %rax +; APX-NEXT: mulq %r26 +; APX-NEXT: movq %rdx, %r8 +; APX-NEXT: movq %rax, %r11 +; APX-NEXT: addq %r10, %r11 +; APX-NEXT: adcq %rcx, %r8 +; APX-NEXT: setb %cl +; APX-NEXT: movq %r17, %rax +; APX-NEXT: mulq %r26 +; APX-NEXT: movq %rdx, %r10 +; APX-NEXT: movq %rax, %r17 +; APX-NEXT: addq %r8, %r17 +; APX-NEXT: movzbl %cl, %eax +; APX-NEXT: adcq %rax, %r10 +; APX-NEXT: addq %rdi, %r17 +; APX-NEXT: adcq %r18, %r10 +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; APX-NEXT: imulq %r15, %rdi +; APX-NEXT: movq %r15, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; APX-NEXT: mulq %r8 +; APX-NEXT: movq %rax, %rcx +; APX-NEXT: addq %rdi, %rdx +; APX-NEXT: movq %r8, %rax +; APX-NEXT: imulq %r14, %rax +; APX-NEXT: addq %rdx, %rax +; APX-NEXT: movq %rax, %r18 +; APX-NEXT: movq %rbx, %rdi +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Reload +; APX-NEXT: imulq %r19, %rdi +; APX-NEXT: movq %rbx, %rax +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; APX-NEXT: mulq %r8 +; APX-NEXT: movq %rax, %r26 +; APX-NEXT: addq %rdi, %rdx +; APX-NEXT: imulq %r8, %r28 +; APX-NEXT: addq %rdx, %r28 +; APX-NEXT: addq %rcx, %r26 +; APX-NEXT: adcq %r18, %r28 +; APX-NEXT: movq %r8, %rax +; APX-NEXT: movq %r8, %rdi +; APX-NEXT: mulq %r15 +; APX-NEXT: movq %rdx, %rcx +; APX-NEXT: movq %rax, %r8 +; APX-NEXT: movq %r19, %rax +; APX-NEXT: mulq %r15 +; APX-NEXT: movq %rdx, %rbx +; APX-NEXT: movq %rax, %r15 +; APX-NEXT: addq %rcx, %r15 +; APX-NEXT: adcq $0, %rbx +; APX-NEXT: movq %rdi, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: movq %rdx, %rcx +; APX-NEXT: movq %rax, %r18 +; APX-NEXT: addq %r15, %r18 +; APX-NEXT: adcq %rbx, %rcx +; APX-NEXT: setb %dil +; APX-NEXT: movq %r19, %rax +; APX-NEXT: mulq %r14 +; APX-NEXT: addq %rcx, %rax +; APX-NEXT: movzbl %dil, %ecx +; APX-NEXT: adcq %rcx, %rdx +; APX-NEXT: addq %r26, %rax +; APX-NEXT: adcq %r28, %rdx +; APX-NEXT: addq %rsi, %r8 +; APX-NEXT: adcq %r11, %r18 +; APX-NEXT: adcq %r17, %rax +; APX-NEXT: adcq %r10, %rdx +; APX-NEXT: addq %r31, %r8 +; APX-NEXT: adcq %r13, %r18 +; APX-NEXT: adcq %r9, %rax +; APX-NEXT: adcq %r16, %rdx +; APX-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload +; APX-NEXT: adcq %r22, %r21 +; APX-NEXT: adcq %r20, %r8 +; APX-NEXT: adcq %r25, %r18 +; APX-NEXT: adcq %r27, %rax +; APX-NEXT: adcq %r24, %rdx +; APX-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Folded Reload +; APX-NEXT: adcq (%rsp), %r8 # 8-byte Folded Reload +; APX-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Folded Reload +; APX-NEXT: adcq %r12, %rax +; APX-NEXT: adcq %rbp, %rdx +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, (%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 8(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 16(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 24(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 32(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 40(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 48(%rcx) +; APX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; APX-NEXT: movq %rsi, 56(%rcx) +; APX-NEXT: movq %r23, 64(%rcx) +; APX-NEXT: movq %r29, 72(%rcx) +; APX-NEXT: movq %r30, 80(%rcx) +; APX-NEXT: movq %r21, 88(%rcx) +; APX-NEXT: movq %r8, 96(%rcx) +; APX-NEXT: movq %r18, 104(%rcx) +; APX-NEXT: movq %rax, 112(%rcx) +; APX-NEXT: movq %rdx, 120(%rcx) +; APX-NEXT: addq $104, %rsp +; APX-NEXT: popq %rbx +; APX-NEXT: popq %r12 +; APX-NEXT: popq %r13 +; APX-NEXT: popq %r14 +; APX-NEXT: popq %r15 +; APX-NEXT: popq %rbp +; APX-NEXT: retq + %av = load i1024, ptr %a + %bv = load i1024, ptr %b + %r = mul i1024 %av, %bv + store i1024 %r, ptr %out + ret void +} diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-general.ll b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll new file mode 100644 index 0000000000000..1f92883f41708 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+sse2,+ssse3,+egpr | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+sse2,+ssse3,+egpr,+avx | FileCheck %s --check-prefix=AVX + +define i32 @map0(ptr nocapture noundef readonly %a, i64 noundef %b) { + ; SSE-LABEL: name: map0 + ; SSE: bb.0.entry: + ; SSE-NEXT: liveins: $rdi, $rsi + ; SSE-NEXT: {{ $}} + ; SSE-NEXT: [[COPY:%[0-9]+]]:gr64_nosp = COPY $rsi + ; SSE-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; SSE-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s32) from %ir.add.ptr) + ; SSE-NEXT: $eax = COPY [[MOV32rm]] + ; SSE-NEXT: RET 0, $eax + ; AVX-LABEL: name: map0 + ; AVX: bb.0.entry: + ; AVX-NEXT: liveins: $rdi, $rsi + ; AVX-NEXT: {{ $}} + ; AVX-NEXT: [[COPY:%[0-9]+]]:gr64_nosp = COPY $rsi + ; AVX-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; AVX-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s32) from %ir.add.ptr) + ; AVX-NEXT: $eax = COPY [[MOV32rm]] + ; AVX-NEXT: RET 0, $eax +entry: + %add.ptr = getelementptr inbounds i32, ptr %a, i64 %b + %0 = load i32, ptr %add.ptr + ret i32 %0 +} + +define i32 @map1_or_vex(<2 x double> noundef %a) { + ; SSE-LABEL: name: map1_or_vex + ; SSE: bb.0.entry: + ; SSE-NEXT: liveins: $xmm0 + ; SSE-NEXT: {{ $}} + ; SSE-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 + ; SSE-NEXT: [[CVTSD2SIrr_Int:%[0-9]+]]:gr32 = nofpexcept CVTSD2SIrr_Int [[COPY]], implicit $mxcsr + ; SSE-NEXT: $eax = COPY [[CVTSD2SIrr_Int]] + ; SSE-NEXT: RET 0, $eax + ; AVX-LABEL: name: map1_or_vex + ; AVX: bb.0.entry: + ; AVX-NEXT: liveins: $xmm0 + ; AVX-NEXT: {{ $}} + ; AVX-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 + ; AVX-NEXT: [[VCVTSD2SIrr_Int:%[0-9]+]]:gr32_norex2 = nofpexcept VCVTSD2SIrr_Int [[COPY]], implicit $mxcsr + ; AVX-NEXT: $eax = COPY [[VCVTSD2SIrr_Int]] + ; AVX-NEXT: RET 0, $eax +entry: + %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a) + ret i32 %0 +} + +define <2 x i64> @map2_or_vex(ptr nocapture noundef readonly %b, i64 noundef %c) { + ; SSE-LABEL: name: map2_or_vex + ; SSE: bb.0.entry: + ; SSE-NEXT: liveins: $rdi, $rsi + ; SSE-NEXT: {{ $}} + ; SSE-NEXT: [[COPY:%[0-9]+]]:gr64_norex2_nosp = COPY $rsi + ; SSE-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; SSE-NEXT: [[PABSBrm:%[0-9]+]]:vr128 = PABSBrm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s128) from %ir.add.ptr) + ; SSE-NEXT: $xmm0 = COPY [[PABSBrm]] + ; SSE-NEXT: RET 0, $xmm0 + ; AVX-LABEL: name: map2_or_vex + ; AVX: bb.0.entry: + ; AVX-NEXT: liveins: $rdi, $rsi + ; AVX-NEXT: {{ $}} + ; AVX-NEXT: [[COPY:%[0-9]+]]:gr64_norex2_nosp = COPY $rsi + ; AVX-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; AVX-NEXT: [[VPABSBrm:%[0-9]+]]:vr128 = VPABSBrm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s128) from %ir.add.ptr) + ; AVX-NEXT: $xmm0 = COPY [[VPABSBrm]] + ; AVX-NEXT: RET 0, $xmm0 +entry: + %add.ptr = getelementptr inbounds i32, ptr %b, i64 %c + %a = load <2 x i64>, ptr %add.ptr + %0 = bitcast <2 x i64> %a to <16 x i8> + %elt.abs.i = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %0, i1 false) + %1 = bitcast <16 x i8> %elt.abs.i to <2 x i64> + ret <2 x i64> %1 +} + +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg) diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll new file mode 100644 index 0000000000000..e082becbc441d --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+amx-tile,+egpr | FileCheck %s + +define dso_local void @amx(ptr noundef %data) { + ; CHECK-LABEL: name: amx + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $rdi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_norex2_nosp = MOV32ri64 8 + ; CHECK-NEXT: PTILELOADD 4, [[COPY]], 1, killed [[MOV32ri64_]], 0, $noreg + ; CHECK-NEXT: RET 0 + entry: + call void @llvm.x86.tileloadd64(i8 4, ptr %data, i64 8) + ret void +} + +declare void @llvm.x86.tileloadd64(i8 immarg, ptr, i64) diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll new file mode 100644 index 0000000000000..10ec184516e7e --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=-sse,+egpr | FileCheck %s + +define void @x87(ptr %0, ptr %1) { + ; CHECK-LABEL: name: x87 + ; CHECK: bb.0 (%ir-block.2): + ; CHECK-NEXT: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_norex2 = COPY $rsi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: [[LD_Fp32m:%[0-9]+]]:rfp32 = nofpexcept LD_Fp32m [[COPY1]], 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.0) + ; CHECK-NEXT: nofpexcept ST_Fp32m [[COPY]], 1, $noreg, 0, $noreg, killed [[LD_Fp32m]], implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.1) + ; CHECK-NEXT: RET 0 + %3 = load float, ptr %0 + store float %3, ptr %1 + ret void +} + diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-special.ll b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll new file mode 100644 index 0000000000000..b277949697417 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+xsave,+egpr | FileCheck %s + +define void @test_xsave(ptr %ptr, i32 %hi, i32 %lo) { + ; CHECK-LABEL: name: test_xsave + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: $edx = COPY [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[COPY]] + ; CHECK-NEXT: XSAVE [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax + ; CHECK-NEXT: RET 0 + call void @llvm.x86.xsave(ptr %ptr, i32 %hi, i32 %lo) + ret void; +} +declare void @llvm.x86.xsave(ptr, i32, i32) + +define void @test_xsave64(ptr %ptr, i32 %hi, i32 %lo) { + ; CHECK-LABEL: name: test_xsave64 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: $edx = COPY [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[COPY]] + ; CHECK-NEXT: XSAVE64 [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax + ; CHECK-NEXT: RET 0 + call void @llvm.x86.xsave64(ptr %ptr, i32 %hi, i32 %lo) + ret void; +} +declare void @llvm.x86.xsave64(ptr, i32, i32) + +define void @test_xrstor(ptr %ptr, i32 %hi, i32 %lo) { + ; CHECK-LABEL: name: test_xrstor + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: $edx = COPY [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[COPY]] + ; CHECK-NEXT: XRSTOR [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax + ; CHECK-NEXT: RET 0 + call void @llvm.x86.xrstor(ptr %ptr, i32 %hi, i32 %lo) + ret void; +} +declare void @llvm.x86.xrstor(ptr, i32, i32) + +define void @test_xrstor64(ptr %ptr, i32 %hi, i32 %lo) { + ; CHECK-LABEL: name: test_xrstor64 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $rdi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi + ; CHECK-NEXT: $edx = COPY [[COPY1]] + ; CHECK-NEXT: $eax = COPY [[COPY]] + ; CHECK-NEXT: XRSTOR64 [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax + ; CHECK-NEXT: RET 0 + call void @llvm.x86.xrstor64(ptr %ptr, i32 %hi, i32 %lo) + ret void; +} +declare void @llvm.x86.xrstor64(ptr, i32, i32) diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll index 4d0c94125c761..219b33b4297ae 100644 --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $r11b $r11bh $r11d $r11w $r11wh $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $k0_k1 $k2_k3 $k4_k5 $k6_k7 $tmmcfg $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index ce672a70b1f91..d3ded0b2a03d8 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -37,6 +37,7 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-NEXT: .cfi_offset %r14, -32 ; LINUX-NEXT: .cfi_offset %r15, -24 ; LINUX-NEXT: .cfi_offset %rbp, -16 +; LINUX-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -45,12 +46,11 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movl %eax, %ebp -; LINUX-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; LINUX-NEXT: movq %r8, %r14 -; LINUX-NEXT: movq %rcx, %r15 -; LINUX-NEXT: movq %rdx, %r12 -; LINUX-NEXT: movq %rsi, %r13 +; LINUX-NEXT: movq %r9, %r14 +; LINUX-NEXT: movq %r8, %r15 +; LINUX-NEXT: movq %rcx, %r12 +; LINUX-NEXT: movq %rdx, %r13 +; LINUX-NEXT: movq %rsi, %rbp ; LINUX-NEXT: movq %rdi, %rbx ; LINUX-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) @@ -78,12 +78,12 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-NEXT: callq get_f@PLT ; LINUX-NEXT: movq %rax, %r11 ; LINUX-NEXT: movq %rbx, %rdi -; LINUX-NEXT: movq %r13, %rsi -; LINUX-NEXT: movq %r12, %rdx -; LINUX-NEXT: movq %r15, %rcx -; LINUX-NEXT: movq %r14, %r8 -; LINUX-NEXT: movl %ebp, %eax -; LINUX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; LINUX-NEXT: movq %rbp, %rsi +; LINUX-NEXT: movq %r13, %rdx +; LINUX-NEXT: movq %r12, %rcx +; LINUX-NEXT: movq %r15, %r8 +; LINUX-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; LINUX-NEXT: movq %r14, %r9 ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload @@ -130,6 +130,7 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-X32-NEXT: .cfi_offset %r14, -32 ; LINUX-X32-NEXT: .cfi_offset %r15, -24 ; LINUX-X32-NEXT: .cfi_offset %rbp, -16 +; LINUX-X32-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -138,12 +139,11 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movl %eax, %ebp -; LINUX-X32-NEXT: movq %r9, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill -; LINUX-X32-NEXT: movq %r8, %r14 -; LINUX-X32-NEXT: movq %rcx, %r15 -; LINUX-X32-NEXT: movq %rdx, %r12 -; LINUX-X32-NEXT: movq %rsi, %r13 +; LINUX-X32-NEXT: movq %r9, %r14 +; LINUX-X32-NEXT: movq %r8, %r15 +; LINUX-X32-NEXT: movq %rcx, %r12 +; LINUX-X32-NEXT: movq %rdx, %r13 +; LINUX-X32-NEXT: movq %rsi, %rbp ; LINUX-X32-NEXT: movq %rdi, %rbx ; LINUX-X32-NEXT: movq %rsi, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movq %rdx, {{[0-9]+}}(%esp) @@ -171,12 +171,12 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-X32-NEXT: callq get_f@PLT ; LINUX-X32-NEXT: movl %eax, %r11d ; LINUX-X32-NEXT: movq %rbx, %rdi -; LINUX-X32-NEXT: movq %r13, %rsi -; LINUX-X32-NEXT: movq %r12, %rdx -; LINUX-X32-NEXT: movq %r15, %rcx -; LINUX-X32-NEXT: movq %r14, %r8 -; LINUX-X32-NEXT: movl %ebp, %eax -; LINUX-X32-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r9 # 8-byte Reload +; LINUX-X32-NEXT: movq %rbp, %rsi +; LINUX-X32-NEXT: movq %r13, %rdx +; LINUX-X32-NEXT: movq %r12, %rcx +; LINUX-X32-NEXT: movq %r15, %r8 +; LINUX-X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; LINUX-X32-NEXT: movq %r14, %r9 ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir index 4efddd57cf5b0..e2e963d93d7a4 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -351,7 +351,7 @@ body: | ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK-NEXT: undef [[MOV32ri2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], [[MOV32ri2]], 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, [[MOV32ri2]], 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4784137 /* reguse:GR64 */, [[MOV32ri2]], 4784137 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] @@ -471,7 +471,7 @@ body: | %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags - INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4784137 /* reguse:GR64 */, %53, 4784137 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 diff --git a/llvm/test/MC/AsmParser/seh-directive-errors.s b/llvm/test/MC/AsmParser/seh-directive-errors.s index 8f4cc211d1c02..6d1486916caf3 100644 --- a/llvm/test/MC/AsmParser/seh-directive-errors.s +++ b/llvm/test/MC/AsmParser/seh-directive-errors.s @@ -105,7 +105,7 @@ h: # @h i: .seh_proc i pushq %rbp - .seh_pushreg 17 + .seh_pushreg 32 # CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: incorrect register number for use with this directive pushq %rbx .seh_pushreg %xmm0 diff --git a/llvm/test/MC/X86/apx/cfi-reg.s b/llvm/test/MC/X86/apx/cfi-reg.s new file mode 100644 index 0000000000000..ee1e76bd20007 --- /dev/null +++ b/llvm/test/MC/X86/apx/cfi-reg.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s + +f: + .cfi_startproc + .cfi_offset 130, -120 + .cfi_offset 131, -112 + .cfi_offset 132, -104 + .cfi_offset 133, -96 + .cfi_offset 134, -88 + .cfi_offset 135, -80 + .cfi_offset 136, -72 + .cfi_offset 137, -64 + .cfi_offset 138, -56 + .cfi_offset 139, -48 + .cfi_offset 140, -40 + .cfi_offset 141, -32 + .cfi_offset 142, -24 + .cfi_offset 143, -16 + .cfi_offset 144, -8 + .cfi_offset 145, 0 + .cfi_endproc + +// CHECK: f: +// CHECK-NEXT: .cfi_startproc +// CHECK-NEXT: .cfi_offset %r16, -120 +// CHECK-NEXT: .cfi_offset %r17, -112 +// CHECK-NEXT: .cfi_offset %r18, -104 +// CHECK-NEXT: .cfi_offset %r19, -96 +// CHECK-NEXT: .cfi_offset %r20, -88 +// CHECK-NEXT: .cfi_offset %r21, -80 +// CHECK-NEXT: .cfi_offset %r22, -72 +// CHECK-NEXT: .cfi_offset %r23, -64 +// CHECK-NEXT: .cfi_offset %r24, -56 +// CHECK-NEXT: .cfi_offset %r25, -48 +// CHECK-NEXT: .cfi_offset %r26, -40 +// CHECK-NEXT: .cfi_offset %r27, -32 +// CHECK-NEXT: .cfi_offset %r28, -24 +// CHECK-NEXT: .cfi_offset %r29, -16 +// CHECK-NEXT: .cfi_offset %r30, -8 +// CHECK-NEXT: .cfi_offset %r31, 0 +// CHECK-NEXT: .cfi_endproc