Skip to content

Commit

Permalink
AVX-512: implement kunpck intrinsics.
Browse files Browse the repository at this point in the history
Differential Revision: http://reviews.llvm.org/D14821

llvm-svn: 254908
  • Loading branch information
Igor Breger committed Dec 7, 2015
1 parent a1240d8 commit 3ab6f17
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 74 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Expand Up @@ -4026,6 +4026,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
Expand Down
50 changes: 35 additions & 15 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -15998,19 +15998,26 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
}

if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
assert(MaskVT == MVT::v64i1 && "Unexpected mask VT!");
assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
// In case 32bit mode, bitcast i64 is illegal, extend/split it.
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(0, dl, MVT::i32));
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(1, dl, MVT::i32));

Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, Hi);

return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Hi, Lo);
if (MaskVT == MVT::v64i1) {
assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
// In case 32bit mode, bitcast i64 is illegal, extend/split it.
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(0, dl, MVT::i32));
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
DAG.getConstant(1, dl, MVT::i32));

Lo = DAG.getBitcast(MVT::v32i1, Lo);
Hi = DAG.getBitcast(MVT::v32i1, Hi);

return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
} else {
// MaskVT require < 64bit. Truncate mask (should succeed in any case),
// and bitcast.
MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
return DAG.getBitcast(MaskVT,
DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
}

} else {
MVT BitcastVT = MVT::getVectorVT(MVT::i1,
Expand Down Expand Up @@ -16600,6 +16607,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
case KUNPCK: {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);

SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
// Arguments should be swapped.
SDValue Res = DAG.getNode(IntrData->Opc0, dl,
MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
default:
break;
}
Expand Down Expand Up @@ -20001,8 +20020,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
}
case ISD::INTRINSIC_WO_CHAIN: {
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG));
return;
if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
Results.push_back(V);
return;
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Expand Down
13 changes: 3 additions & 10 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -2396,16 +2396,6 @@ defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;

multiclass avx512_mask_unpck_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
(i16 GR16:$src1), (i16 GR16:$src2)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
(v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
(v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
}
defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;

// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode, Predicate prd> {
Expand Down Expand Up @@ -2496,6 +2486,9 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;

def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
(v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;

def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
(v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;

Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Expand Up @@ -30,7 +30,7 @@ enum IntrinsicType {
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK
};

struct IntrinsicData {
Expand Down Expand Up @@ -341,7 +341,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),

X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
Expand Down Expand Up @@ -1827,7 +1829,7 @@ static void verifyIntrinsicTables() {
"Intrinsic data tables should have unique entries");
}

// X86 specific compare constants.
// X86 specific compare constants.
// They must be kept in synch with avxintrin.h
#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
Expand Down
141 changes: 95 additions & 46 deletions llvm/test/CodeGen/X86/avx512-intrinsics.ll
Expand Up @@ -65,9 +65,9 @@ declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
; CHECK-LABEL: unpckbw_test:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kunpckbw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
Expand Down Expand Up @@ -6160,76 +6160,103 @@ define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x
}

define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae
; CHECK: vcomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sete %al
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
ret i32 %res
}

define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae
; CHECK: vucomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sete %al
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
ret i32 %res
}

define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_eq
; CHECK: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
ret i32 %res
}

define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq
; CHECK: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
ret i32 %res
}

define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae
; CHECK: vcomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
ret i32 %res
}

define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae
; CHECK: vucomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
ret i32 %res
}

define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_comi_sd_lt
; CHECK: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
ret i32 %res
}

define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt
; CHECK: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
ret i32 %res
}

declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)

define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt
; CHECK: vucomiss %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
%res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
ret i32 %res
}

Expand All @@ -6238,43 +6265,65 @@ declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x

define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
; CHECK: vmovss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
ret <4 x float> %res
}

define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
; CHECK: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
ret <4 x float> %res
}

define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
; CHECK: vmovss %xmm1, %xmm0, %xmm0
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
ret <4 x float> %res
}

declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
; CHECK: vmovsd %xmm1, %xmm0, %xmm0
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
ret <2 x double> %res
}

define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
; CHECK: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
ret <2 x double> %res
}

define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
; CHECK: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
ret <2 x double> %res
}
Expand Down

0 comments on commit 3ab6f17

Please sign in to comment.