Expand Up
@@ -41,7 +41,6 @@ using namespace llvm;
namespace {
class AArch64FastISel : public FastISel {
class Address {
public:
typedef enum {
Expand All
@@ -51,17 +50,23 @@ class AArch64FastISel : public FastISel {
private:
BaseKind Kind;
AArch64_AM::ShiftExtendType ExtType;
union {
unsigned Reg;
int FI;
} Base;
unsigned OffsetReg;
unsigned Shift;
int64_t Offset;
const GlobalValue *GV;
public:
Address () : Kind(RegBase), Offset(0 ), GV(nullptr ) { Base.Reg = 0 ; }
Address () : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
OffsetReg (0 ), Shift(0 ), Offset(0 ), GV(nullptr ) { Base.Reg = 0 ; }
void setKind (BaseKind K) { Kind = K; }
BaseKind getKind () const { return Kind; }
void setExtendType (AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType () const { return ExtType; }
bool isRegBase () const { return Kind == RegBase; }
bool isFIBase () const { return Kind == FrameIndexBase; }
void setReg (unsigned Reg) {
Expand All
@@ -72,6 +77,14 @@ class AArch64FastISel : public FastISel {
assert (isRegBase () && " Invalid base register access!" );
return Base.Reg ;
}
void setOffsetReg (unsigned Reg) {
assert (isRegBase () && " Invalid offset register access!" );
OffsetReg = Reg;
}
unsigned getOffsetReg () const {
assert (isRegBase () && " Invalid offset register access!" );
return OffsetReg;
}
void setFI (unsigned FI) {
assert (isFIBase () && " Invalid base frame index access!" );
Base.FI = FI;
Expand All
@@ -82,11 +95,11 @@ class AArch64FastISel : public FastISel {
}
void setOffset (int64_t O) { Offset = O; }
int64_t getOffset () { return Offset; }
void setShift (unsigned S) { Shift = S; }
unsigned getShift () { return Shift; }
void setGlobalValue (const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue () { return GV; }
bool isValid () { return isFIBase () || (isRegBase () && getReg () != 0 ); }
};
// / Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
Expand Down
Expand Up
@@ -121,13 +134,12 @@ class AArch64FastISel : public FastISel {
// Utility helper routines.
bool isTypeLegal (Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal (Type *Ty, MVT &VT);
bool ComputeAddress (const Value *Obj, Address &Addr);
bool ComputeAddress (const Value *Obj, Address &Addr, Type *Ty = nullptr );
bool ComputeCallAddress (const Value *V, Address &Addr);
bool SimplifyAddress (Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled);
bool SimplifyAddress (Address &Addr, MVT VT);
void AddLoadStoreOperands (Address &Addr, const MachineInstrBuilder &MIB,
unsigned Flags, MachineMemOperand *MMO ,
bool UseUnscaled );
unsigned Flags, unsigned ScaleFactor ,
MachineMemOperand *MMO );
bool IsMemCpySmall (uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy (Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
Expand All
@@ -137,9 +149,9 @@ class AArch64FastISel : public FastISel {
// Emit functions.
bool EmitCmp (Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad (MVT VT, unsigned &ResultReg, Address Addr,
MachineMemOperand *MMO = nullptr , bool UseUnscaled = false );
MachineMemOperand *MMO = nullptr );
bool EmitStore (MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr , bool UseUnscaled = false );
MachineMemOperand *MMO = nullptr );
unsigned EmitIntExt (MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext (unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emit_MUL_rr (MVT RetVT, unsigned Op0, bool Op0IsKill,
Expand Down
Expand Up
@@ -337,7 +349,8 @@ unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
}
// Computes the address to get to an object.
bool AArch64FastISel::ComputeAddress (const Value *Obj, Address &Addr) {
bool AArch64FastISel::ComputeAddress (const Value *Obj, Address &Addr, Type *Ty)
{
const User *U = nullptr ;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
Expand All
@@ -364,18 +377,18 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
break ;
case Instruction::BitCast: {
// Look through bitcasts.
return ComputeAddress (U->getOperand (0 ), Addr);
return ComputeAddress (U->getOperand (0 ), Addr, Ty );
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType (U->getOperand (0 )->getType ()) == TLI.getPointerTy ())
return ComputeAddress (U->getOperand (0 ), Addr);
return ComputeAddress (U->getOperand (0 ), Addr, Ty );
break ;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType (U->getType ()) == TLI.getPointerTy ())
return ComputeAddress (U->getOperand (0 ), Addr);
return ComputeAddress (U->getOperand (0 ), Addr, Ty );
break ;
}
case Instruction::GetElementPtr: {
Expand Down
Expand Up
@@ -417,7 +430,7 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
// Try to grab the base operand now.
Addr.setOffset (TmpOffset);
if (ComputeAddress (U->getOperand (0 ), Addr))
if (ComputeAddress (U->getOperand (0 ), Addr, Ty ))
return true ;
// We failed, restore everything and try the other options.
Expand All
@@ -437,19 +450,86 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
}
break ;
}
case Instruction::Add:
case Instruction::Add: {
// Adds of constants are common and easy enough.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand (1 ))) {
const Value *LHS = U->getOperand (0 );
const Value *RHS = U->getOperand (1 );
if (isa<ConstantInt>(LHS))
std::swap (LHS, RHS);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset (Addr.getOffset () + (uint64_t )CI->getSExtValue ());
return ComputeAddress (U->getOperand (0 ), Addr);
return ComputeAddress (LHS, Addr, Ty);
}
Address Backup = Addr;
if (ComputeAddress (LHS, Addr, Ty) && ComputeAddress (RHS, Addr, Ty))
return true ;
Addr = Backup;
break ;
}
case Instruction::Shl:
if (Addr.getOffsetReg ())
break ;
if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand (1 ))) {
unsigned Val = CI->getZExtValue ();
if (Val < 1 || Val > 3 )
break ;
uint64_t NumBytes = 0 ;
if (Ty && Ty->isSized ()) {
uint64_t NumBits = DL.getTypeSizeInBits (Ty);
NumBytes = NumBits / 8 ;
if (!isPowerOf2_64 (NumBits))
NumBytes = 0 ;
}
if (NumBytes != (1 << Val))
break ;
Addr.setShift (Val);
Addr.setExtendType (AArch64_AM::LSL);
if (const auto *I = dyn_cast<Instruction>(U->getOperand (0 )))
if (FuncInfo.MBBMap [I->getParent ()] == FuncInfo.MBB )
U = I;
if (const auto *ZE = dyn_cast<ZExtInst>(U))
if (ZE->getOperand (0 )->getType ()->isIntegerTy (32 ))
Addr.setExtendType (AArch64_AM::UXTW);
if (const auto *SE = dyn_cast<SExtInst>(U))
if (SE->getOperand (0 )->getType ()->isIntegerTy (32 ))
Addr.setExtendType (AArch64_AM::SXTW);
unsigned Reg = getRegForValue (U->getOperand (0 ));
if (!Reg)
return false ;
Addr.setOffsetReg (Reg);
return true ;
}
break ;
}
// Try to get this in a register if nothing else has worked.
if (!Addr.isValid ())
Addr.setReg (getRegForValue (Obj));
return Addr.isValid ();
if (Addr.getReg ()) {
if (!Addr.getOffsetReg ()) {
unsigned Reg = getRegForValue (Obj);
if (!Reg)
return false ;
Addr.setOffsetReg (Reg);
return true ;
}
return false ;
}
unsigned Reg = getRegForValue (Obj);
if (!Reg)
return false ;
Addr.setReg (Reg);
return true ;
}
bool AArch64FastISel::ComputeCallAddress (const Value *V, Address &Addr) {
Expand Down
Expand Up
@@ -531,50 +611,80 @@ bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
return false ;
}
bool AArch64FastISel::SimplifyAddress (Address &Addr, MVT VT,
int64_t ScaleFactor, bool UseUnscaled) {
bool needsLowering = false ;
int64_t Offset = Addr.getOffset ();
bool AArch64FastISel::SimplifyAddress (Address &Addr, MVT VT) {
unsigned ScaleFactor;
switch (VT.SimpleTy ) {
default :
return false ;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
if (!UseUnscaled)
// Using scaled, 12-bit, unsigned immediate offsets.
needsLowering = ((Offset & 0xfff ) != Offset);
else
// Using unscaled, 9-bit, signed immediate offsets.
needsLowering = (Offset > 256 || Offset < -256 );
break ;
default : return false ;
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1 ; break ;
case MVT::i16: ScaleFactor = 2 ; break ;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4 ; break ;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8 ; break ;
}
// If this is a stack pointer and the offset needs to be simplified then put
bool ImmediateOffsetNeedsLowering = false ;
bool RegisterOffsetNeedsLowering = false ;
int64_t Offset = Addr.getOffset ();
if (((Offset < 0 ) || (Offset & (ScaleFactor - 1 ))) && !isInt<9 >(Offset))
ImmediateOffsetNeedsLowering = true ;
else if (Offset > 0 && !(Offset & (ScaleFactor - 1 )) &&
!isUInt<12 >(Offset / ScaleFactor))
ImmediateOffsetNeedsLowering = true ;
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
// emit an additonal add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset () && Addr.isRegBase () &&
Addr.getOffsetReg ())
RegisterOffsetNeedsLowering = true ;
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if (needsLowering && Addr.getKind () == Address::FrameIndexBase ) {
if (ImmediateOffsetNeedsLowering && Addr.isFIBase () ) {
unsigned ResultReg = createResultReg (&AArch64::GPR64RegClass);
BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc, TII.get (AArch64::ADDXri),
ResultReg)
.addFrameIndex (Addr.getFI ())
.addImm (0 )
.addImm (0 );
.addFrameIndex (Addr.getFI ())
.addImm (0 )
.addImm (0 );
Addr.setKind (Address::RegBase);
Addr.setReg (ResultReg);
}
if (RegisterOffsetNeedsLowering) {
unsigned ResultReg = 0 ;
if (Addr.getReg ()) {
ResultReg = createResultReg (&AArch64::GPR64RegClass);
BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
TII.get (AArch64::ADDXrs), ResultReg)
.addReg (Addr.getReg ())
.addReg (Addr.getOffsetReg ())
.addImm (Addr.getShift ());
} else
ResultReg = Emit_LSL_ri (MVT::i64, Addr.getOffsetReg (),
/* Op0IsKill=*/ false , Addr.getShift ());
if (!ResultReg)
return false ;
Addr.setReg (ResultReg);
Addr.setOffsetReg (0 );
Addr.setShift (0 );
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (needsLowering) {
uint64_t UnscaledOffset = Addr.getOffset () * ScaleFactor;
unsigned ResultReg = FastEmit_ri_ (MVT::i64, ISD::ADD, Addr.getReg (), false ,
UnscaledOffset, MVT::i64);
if (ResultReg == 0 )
if (ImmediateOffsetNeedsLowering) {
unsigned ResultReg = 0 ;
if (Addr.getReg ())
ResultReg = FastEmit_ri_ (MVT::i64, ISD::ADD, Addr.getReg (),
/* IsKill=*/ false , Offset, MVT::i64);
else
ResultReg = FastEmit_i (MVT::i64, MVT::i64, ISD::Constant, Offset);
if (!ResultReg)
return false ;
Addr.setReg (ResultReg);
Addr.setOffset (0 );
Expand All
@@ -585,11 +695,11 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
void AArch64FastISel::AddLoadStoreOperands (Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags,
MachineMemOperand *MMO ,
bool UseUnscaled ) {
int64_t Offset = Addr.getOffset ();
unsigned ScaleFactor ,
MachineMemOperand *MMO ) {
int64_t Offset = Addr.getOffset () / ScaleFactor ;
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind () == Address::FrameIndexBase ) {
if (Addr.isFIBase () ) {
int FI = Addr.getFI ();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
Expand All
@@ -599,91 +709,97 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex (FI).addImm (Offset);
} else {
// Now add the rest of the operands.
MIB.addReg (Addr.getReg ());
MIB.addImm (Offset);
assert (Addr.isRegBase () && " Unexpected address kind." );
if (Addr.getOffsetReg ()) {
assert (Addr.getOffset () == 0 && " Unexpected offset" );
bool IsSigned = Addr.getExtendType () == AArch64_AM::SXTW ||
Addr.getExtendType () == AArch64_AM::SXTX;
MIB.addReg (Addr.getReg ());
MIB.addReg (Addr.getOffsetReg ());
MIB.addImm (IsSigned);
MIB.addImm (Addr.getShift () != 0 );
} else {
MIB.addReg (Addr.getReg ());
MIB.addImm (Offset);
}
}
if (MMO)
MIB.addMemOperand (MMO);
}
bool AArch64FastISel::EmitLoad (MVT VT, unsigned &ResultReg, Address Addr,
MachineMemOperand *MMO, bool UseUnscaled) {
MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!SimplifyAddress (Addr, VT))
return false ;
unsigned ScaleFactor;
switch (VT.SimpleTy ) {
default : llvm_unreachable (" Unexpected value type." );
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1 ; break ;
case MVT::i16: ScaleFactor = 2 ; break ;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4 ; break ;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8 ; break ;
}
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset () < 0 )
UseUnscaled = true ;
bool UseScaled = true ;
if ((Addr.getOffset () < 0 ) || (Addr.getOffset () & (ScaleFactor - 1 ))) {
UseScaled = false ;
ScaleFactor = 1 ;
}
static const unsigned OpcTable[4 ][6 ] = {
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
AArch64::LDURSi, AArch64::LDURDi },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
AArch64::LDRSui, AArch64::LDRDui },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
AArch64::LDRSroX, AArch64::LDRDroX },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
bool VTIsi1 = false ;
int64_t ScaleFactor = 0 ;
switch (VT.SimpleTy ) {
default :
return false ;
case MVT::i1:
VTIsi1 = true ;
// Intentional fall-through.
case MVT::i8:
Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 1 ;
break ;
case MVT::i16:
Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 2 ;
break ;
case MVT::i32:
Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 4 ;
break ;
case MVT::i64:
Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
RC = &AArch64::GPR64RegClass;
ScaleFactor = 8 ;
break ;
case MVT::f32:
Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
RC = TLI.getRegClassFor (VT);
ScaleFactor = 4 ;
break ;
case MVT::f64:
Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
RC = TLI.getRegClassFor (VT);
ScaleFactor = 8 ;
break ;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset ();
if (Offset & (ScaleFactor - 1 ))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitLoad (VT, ResultReg, Addr, MMO, /* UseUnscaled*/ true );
bool UseRegOffset = Addr.isRegBase () && !Addr.getOffset () && Addr.getReg () &&
Addr.getOffsetReg ();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0 ;
if (Addr.getExtendType () == AArch64_AM::UXTW ||
Addr.getExtendType () == AArch64_AM::SXTW)
Idx++;
Addr.setOffset (Offset / ScaleFactor);
switch (VT.SimpleTy ) {
default : llvm_unreachable (" Unexpected value type." );
case MVT::i1: VTIsi1 = true ; // Intentional fall-through.
case MVT::i8: Opc = OpcTable[Idx][0 ]; RC = &AArch64::GPR32RegClass; break ;
case MVT::i16: Opc = OpcTable[Idx][1 ]; RC = &AArch64::GPR32RegClass; break ;
case MVT::i32: Opc = OpcTable[Idx][2 ]; RC = &AArch64::GPR32RegClass; break ;
case MVT::i64: Opc = OpcTable[Idx][3 ]; RC = &AArch64::GPR64RegClass; break ;
case MVT::f32: Opc = OpcTable[Idx][4 ]; RC = &AArch64::FPR32RegClass; break ;
case MVT::f64: Opc = OpcTable[Idx][5 ]; RC = &AArch64::FPR64RegClass; break ;
}
// Simplify this down to something we can handle.
if (!SimplifyAddress (Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false ;
// Create the base instruction, then add the operands.
ResultReg = createResultReg (RC);
MachineInstrBuilder MIB = BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
TII.get (Opc), ResultReg);
AddLoadStoreOperands (Addr, MIB, MachineMemOperand::MOLoad, MMO, UseUnscaled );
AddLoadStoreOperands (Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO );
// Loading an i1 requires special handling.
if (VTIsi1) {
MRI.constrainRegClass (ResultReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg (&AArch64::GPR32spRegClass);
BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc, TII.get (AArch64::ANDWri),
ANDReg)
.addReg (ResultReg)
.addImm (AArch64_AM::encodeLogicalImmediate (1 , 32 ));
.addReg (ResultReg)
.addImm (AArch64_AM::encodeLogicalImmediate (1 , 32 ));
ResultReg = ANDReg;
}
return true ;
Expand All
@@ -699,7 +815,7 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) {
// See if we can handle this address.
Address Addr;
if (!ComputeAddress (I->getOperand (0 ), Addr))
if (!ComputeAddress (I->getOperand (0 ), Addr, I-> getType () ))
return false ;
unsigned ResultReg;
Expand All
@@ -711,74 +827,79 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) {
}
bool AArch64FastISel::EmitStore (MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO, bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset () < 0 )
UseUnscaled = true ;
MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!SimplifyAddress (Addr, VT))
return false ;
unsigned StrOpc;
bool VTIsi1 = false ;
int64_t ScaleFactor = 0 ;
// Using scaled, 12-bit, unsigned immediate offsets.
unsigned ScaleFactor;
switch (VT.SimpleTy ) {
default :
return false ;
case MVT::i1:
VTIsi1 = true ;
case MVT::i8:
StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
ScaleFactor = 1 ;
break ;
case MVT::i16:
StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
ScaleFactor = 2 ;
break ;
case MVT::i32:
StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
ScaleFactor = 4 ;
break ;
case MVT::i64:
StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
ScaleFactor = 8 ;
break ;
case MVT::f32:
StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
ScaleFactor = 4 ;
break ;
case MVT::f64:
StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
ScaleFactor = 8 ;
break ;
default : llvm_unreachable (" Unexpected value type." );
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1 ; break ;
case MVT::i16: ScaleFactor = 2 ; break ;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4 ; break ;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8 ; break ;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset ();
if (Offset & (ScaleFactor - 1 ))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitStore (VT, SrcReg, Addr, MMO, /* UseUnscaled*/ true );
Addr.setOffset (Offset / ScaleFactor);
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
bool UseScaled = true ;
if ((Addr.getOffset () < 0 ) || (Addr.getOffset () & (ScaleFactor - 1 ))) {
UseScaled = false ;
ScaleFactor = 1 ;
}
// Simplify this down to something we can handle.
if (!SimplifyAddress (Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false ;
static const unsigned OpcTable[4 ][6 ] = {
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
AArch64::STURSi, AArch64::STURDi },
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
AArch64::STRSui, AArch64::STRDui },
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
AArch64::STRSroX, AArch64::STRDroX },
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
AArch64::STRSroW, AArch64::STRDroW }
};
unsigned Opc;
bool VTIsi1 = false ;
bool UseRegOffset = Addr.isRegBase () && !Addr.getOffset () && Addr.getReg () &&
Addr.getOffsetReg ();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0 ;
if (Addr.getExtendType () == AArch64_AM::UXTW ||
Addr.getExtendType () == AArch64_AM::SXTW)
Idx++;
switch (VT.SimpleTy ) {
default : llvm_unreachable (" Unexpected value type." );
case MVT::i1: VTIsi1 = true ;
case MVT::i8: Opc = OpcTable[Idx][0 ]; break ;
case MVT::i16: Opc = OpcTable[Idx][1 ]; break ;
case MVT::i32: Opc = OpcTable[Idx][2 ]; break ;
case MVT::i64: Opc = OpcTable[Idx][3 ]; break ;
case MVT::f32: Opc = OpcTable[Idx][4 ]; break ;
case MVT::f64: Opc = OpcTable[Idx][5 ]; break ;
}
// Storing an i1 requires special handling.
if (VTIsi1) {
MRI.constrainRegClass (SrcReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg (&AArch64::GPR32spRegClass);
BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc, TII.get (AArch64::ANDWri),
ANDReg)
.addReg (SrcReg)
.addImm (AArch64_AM::encodeLogicalImmediate (1 , 32 ));
.addReg (SrcReg)
.addImm (AArch64_AM::encodeLogicalImmediate (1 , 32 ));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
TII.get (StrOpc)).addReg (SrcReg);
AddLoadStoreOperands (Addr, MIB, MachineMemOperand::MOStore, MMO, UseUnscaled);
TII.get (Opc))
.addReg (SrcReg);
AddLoadStoreOperands (Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
return true ;
}
Expand All
@@ -800,7 +921,7 @@ bool AArch64FastISel::SelectStore(const Instruction *I) {
// See if we can handle this address.
Address Addr;
if (!ComputeAddress (I->getOperand (1 ), Addr))
if (!ComputeAddress (I->getOperand (1 ), Addr, I-> getOperand ( 0 )-> getType () ))
return false ;
if (!EmitStore (VT, SrcReg, Addr, createMachineMemOperandFor (I)))
Expand Down