Skip to content

Commit

Permalink
[X86][SSE] Shuffle mask decode support for zero extend, scalar float/…
Browse files Browse the repository at this point in the history
…double moves and integer load instructions

This patch adds shuffle mask decodes for integer zero extends (pmovzx** and movq xmm,xmm) and scalar float/double loads/moves (movss/movsd).

Also adds shuffle mask decodes for integer loads (movd/movq).

Differential Revision: http://reviews.llvm.org/D7228

llvm-svn: 227688
  • Loading branch information
RKSimon committed Jan 31, 2015
1 parent aab5ec0 commit 9c76b47
Show file tree
Hide file tree
Showing 10 changed files with 834 additions and 627 deletions.
172 changes: 172 additions & 0 deletions llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
Expand Up @@ -21,6 +21,92 @@

using namespace llvm;

/// \brief Extracts the src/dst types for a given zero extension instruction.
/// \note While the number of elements in DstVT type correct, the
/// number in the SrcVT type is expanded to fill the src xmm register and the
/// upper elements may not be included in the dst xmm/ymm register.
static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
switch (MI->getOpcode()) {
default:
llvm_unreachable("Unknown zero extension instruction");
// i8 zero extension
case X86::PMOVZXBWrm:
case X86::PMOVZXBWrr:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBWrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v8i16;
break;
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBWYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v16i16;
break;
case X86::PMOVZXBDrm:
case X86::PMOVZXBDrr:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBDrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v4i32;
break;
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBDYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v8i32;
break;
case X86::PMOVZXBQrm:
case X86::PMOVZXBQrr:
case X86::VPMOVZXBQrm:
case X86::VPMOVZXBQrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXBQYrm:
case X86::VPMOVZXBQYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v4i64;
break;
// i16 zero extension
case X86::PMOVZXWDrm:
case X86::PMOVZXWDrr:
case X86::VPMOVZXWDrm:
case X86::VPMOVZXWDrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v4i32;
break;
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWDYrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v8i32;
break;
case X86::PMOVZXWQrm:
case X86::PMOVZXWQrr:
case X86::VPMOVZXWQrm:
case X86::VPMOVZXWQrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXWQYrm:
case X86::VPMOVZXWQYrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v4i64;
break;
// i32 zero extension
case X86::PMOVZXDQrm:
case X86::PMOVZXDQrr:
case X86::VPMOVZXDQrm:
case X86::VPMOVZXDQrr:
SrcVT = MVT::v4i32;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXDQYrm:
case X86::VPMOVZXDQYrr:
SrcVT = MVT::v4i32;
DstVT = MVT::v4i64;
break;
}
}

//===----------------------------------------------------------------------===//
// Top Level Entrypoint
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -750,6 +836,92 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;

case X86::MOVSDrr:
case X86::VMOVSDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVSDrm:
case X86::VMOVSDrm:
DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVSSrr:
case X86::VMOVSSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVSSrm:
case X86::VMOVSSrm:
DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;

case X86::MOVPQI2QIrr:
case X86::MOVZPQILo2PQIrr:
case X86::VMOVPQI2QIrr:
case X86::VMOVZPQILo2PQIrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVQI2PQIrm:
case X86::MOVZQI2PQIrm:
case X86::MOVZPQILo2PQIrm:
case X86::VMOVQI2PQIrm:
case X86::VMOVZQI2PQIrm:
case X86::VMOVZPQILo2PQIrm:
DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVDI2PDIrm:
case X86::VMOVDI2PDIrm:
DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;

case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
case X86::PMOVZXWDrr:
case X86::PMOVZXWQrr:
case X86::PMOVZXDQrr:
case X86::VPMOVZXBWrr:
case X86::VPMOVZXBDrr:
case X86::VPMOVZXBQrr:
case X86::VPMOVZXWDrr:
case X86::VPMOVZXWQrr:
case X86::VPMOVZXDQrr:
case X86::VPMOVZXBWYrr:
case X86::VPMOVZXBDYrr:
case X86::VPMOVZXBQYrr:
case X86::VPMOVZXWDYrr:
case X86::VPMOVZXWQYrr:
case X86::VPMOVZXDQYrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PMOVZXBWrm:
case X86::PMOVZXBDrm:
case X86::PMOVZXBQrm:
case X86::PMOVZXWDrm:
case X86::PMOVZXWQrm:
case X86::PMOVZXDQrm:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBQrm:
case X86::VPMOVZXWDrm:
case X86::VPMOVZXWQrm:
case X86::VPMOVZXDQrm:
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBQYrm:
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWQYrm:
case X86::VPMOVZXDQYrm: {
MVT SrcVT, DstVT;
getZeroExtensionTypes(MI, SrcVT, DstVT);
DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
} break;
}

// The only comments we decode are shuffles, so give up if we were unable to
Expand Down

0 comments on commit 9c76b47

Please sign in to comment.