Skip to content

Commit

Permalink
X86: Don't emit SAHF/LAHF for 64-bit targets unless explicitly supported
Browse files Browse the repository at this point in the history
These instructions are not supported by all CPUs in 64-bit mode. Emitting them
causes Chromium to crash on start-up for users with such chips.

(GCC puts these instructions behind -msahf on 64-bit for the same reason.)

This patch adds FeatureLAHFSAHF, enables it by default for 32-bit targets
and modern CPUs, and changes X86InstrInfo::copyPhysReg back to the lowering
from before r244503 when the instructions are not available.

Differential Revision: http://reviews.llvm.org/D15240

llvm-svn: 254793
  • Loading branch information
zmodem committed Dec 4, 2015
1 parent 3e9e7d2 commit 5000ce8
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 49 deletions.
58 changes: 39 additions & 19 deletions llvm/lib/Target/X86/X86.td
Expand Up @@ -182,6 +182,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
Expand Down Expand Up @@ -273,15 +275,17 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureSSSE3,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem
FeatureSlowBTMem,
FeatureLAHFSAHF
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem
FeatureSlowBTMem,
FeatureLAHFSAHF
]>;

// Atom CPUs.
Expand All @@ -299,7 +303,8 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
FeatureSlowDivide64,
FeatureCallRegIndirect,
FeatureLEAUsesAG,
FeaturePadShortFunctions
FeaturePadShortFunctions,
FeatureLAHFSAHF
]>;
def : BonnellProc<"bonnell">;
def : BonnellProc<"atom">; // Pin the generic name to the baseline.
Expand All @@ -319,7 +324,8 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
FeaturePRFCHW,
FeatureSlowLEA,
FeatureSlowIncDec,
FeatureSlowBTMem
FeatureSlowBTMem,
FeatureLAHFSAHF
]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
Expand All @@ -331,7 +337,8 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT
FeaturePOPCNT,
FeatureLAHFSAHF
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
Expand All @@ -346,7 +353,8 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL
FeaturePCLMUL,
FeatureLAHFSAHF
]>;
def : WestmereProc<"westmere">;

Expand All @@ -363,7 +371,8 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT
FeatureXSAVEOPT,
FeatureLAHFSAHF
]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
Expand All @@ -382,7 +391,8 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase
FeatureFSGSBase,
FeatureLAHFSAHF
]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
Expand All @@ -408,7 +418,8 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec
FeatureSlowIncDec,
FeatureLAHFSAHF
]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
Expand Down Expand Up @@ -436,7 +447,8 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec
FeatureSlowIncDec,
FeatureLAHFSAHF
]>;
def : BroadwellProc<"broadwell">;

Expand Down Expand Up @@ -465,7 +477,8 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
FeatureMPX
FeatureMPX,
FeatureLAHFSAHF
]>;
def : KnightsLandingProc<"knl">;

Expand Down Expand Up @@ -500,7 +513,8 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES
FeatureXSAVES,
FeatureLAHFSAHF
]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
Expand Down Expand Up @@ -547,7 +561,7 @@ def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowBTMem, FeatureSlowSHLD]>;
FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;

// Bobcat
def : Proc<"btver1", [
Expand All @@ -560,7 +574,8 @@ def : Proc<"btver1", [
FeatureLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
FeatureSlowSHLD
FeatureSlowSHLD,
FeatureLAHFSAHF
]>;

// Jaguar
Expand All @@ -580,7 +595,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeaturePOPCNT,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureSlowSHLD
FeatureSlowSHLD,
FeatureLAHFSAHF
]>;

// Bulldozer
Expand All @@ -598,7 +614,8 @@ def : Proc<"bdver1", [
FeatureLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
FeatureSlowSHLD
FeatureSlowSHLD,
FeatureLAHFSAHF
]>;
// Piledriver
def : Proc<"bdver2", [
Expand All @@ -619,7 +636,8 @@ def : Proc<"bdver2", [
FeatureBMI,
FeatureTBM,
FeatureFMA,
FeatureSlowSHLD
FeatureSlowSHLD,
FeatureLAHFSAHF
]>;

// Steamroller
Expand All @@ -643,7 +661,8 @@ def : Proc<"bdver3", [
FeatureFMA,
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureFSGSBase
FeatureFSGSBase,
FeatureLAHFSAHF
]>;

// Excavator
Expand All @@ -666,7 +685,8 @@ def : Proc<"bdver4", [
FeatureTBM,
FeatureFMA,
FeatureXSAVEOPT,
FeatureFSGSBase
FeatureFSGSBase,
FeatureLAHFSAHF
]>;

def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -13930,6 +13930,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);

// Some 64-bit targets lack SAHF support, but they do support FCOMI.
assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}

Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Expand Up @@ -4385,7 +4385,32 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Reg = FromEFLAGS ? DestReg : SrcReg;
bool is32 = X86::GR32RegClass.contains(Reg);
bool is64 = X86::GR64RegClass.contains(Reg);

if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
int Pop = is64 ? X86::POP64r : X86::POP32r;
int PopF = is64 ? X86::POPF64 : X86::POPF32;
int AX = is64 ? X86::RAX : X86::EAX;

if (!Subtarget.hasLAHFSAHF()) {
assert(is64 && "Not having LAHF/SAHF only happens on 64-bit.");
// Moving EFLAGS to / from another register requires a push and a pop.
// Notice that we have to adjust the stack if we don't want to clobber the
// first frame index. See X86FrameLowering.cpp - clobbersTheStack.
if (FromEFLAGS) {
BuildMI(MBB, MI, DL, get(PushF));
BuildMI(MBB, MI, DL, get(Pop), DestReg);
}
if (ToEFLAGS) {
BuildMI(MBB, MI, DL, get(Push))
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(PopF));
}
return;
}

// The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
// inefficient. Instead:
// - Save the overflow flag OF into AL using SETO, and restore it using a
Expand All @@ -4407,10 +4432,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Notice that we have to adjust the stack if we don't want to clobber the
// first frame index. See X86FrameLowering.cpp - clobbersTheStack.

int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
int Pop = is64 ? X86::POP64r : X86::POP32r;
int AX = is64 ? X86::RAX : X86::EAX;

bool AXDead = (Reg == AX);
// FIXME: The above could figure out that AX is dead in more cases with:
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/X86/X86InstrInfo.td
Expand Up @@ -799,6 +799,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
Expand Down Expand Up @@ -1502,10 +1503,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
[(set EFLAGS, (X86sahf AH))], IIC_AHF>,
Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
IIC_AHF>, // AH = flags
Requires<[HasLAHFSAHF]>;
} // SchedRW

//===----------------------------------------------------------------------===//
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/X86/X86Subtarget.cpp
Expand Up @@ -189,6 +189,15 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}

// LAHF/SAHF are always supported in non-64-bit mode.
if (!In64BitMode) {
if (!FullFS.empty())
FullFS = "+sahf," + FullFS;
else
FullFS = "+sahf";
}


// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);

Expand Down Expand Up @@ -264,6 +273,7 @@ void X86Subtarget::initializeEnvironment() {
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
HasLAHFSAHF = false;
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86Subtarget.h
Expand Up @@ -152,6 +152,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor has RDSEED instructions.
bool HasRDSEED;

/// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF;

/// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;

Expand Down Expand Up @@ -374,6 +377,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
Expand Down

0 comments on commit 5000ce8

Please sign in to comment.