Skip to content

Commit 5e4ce1a

Browse files
Hao LiuHao Liu
authored andcommitted
Implement the newly added AArch64 ACLE functions for ld1/st1 with 2/3/4 vectors.
The functions are like: vst1_s8_x2 ... llvm-svn: 194991
1 parent 5a4e4e1 commit 5e4ce1a

File tree

5 files changed

+1248
-1
lines changed

5 files changed

+1248
-1
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,20 @@ def ST3 : WInst<"vst3", "vp3",
543543
def ST4 : WInst<"vst4", "vp4",
544544
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
545545

546+
def LD1_X2 : WInst<"vld1_x2", "2c",
547+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
548+
def LD3_x3 : WInst<"vld1_x3", "3c",
549+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
550+
def LD4_x4 : WInst<"vld1_x4", "4c",
551+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
552+
553+
def ST1_X2 : WInst<"vst1_x2", "vp2",
554+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
555+
def ST1_X3 : WInst<"vst1_x3", "vp3",
556+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
557+
def ST1_X4 : WInst<"vst1_x4", "vp4",
558+
"QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">;
559+
546560
////////////////////////////////////////////////////////////////////////////////
547561
// Addition
548562
// With additional Qd type.

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2750,7 +2750,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
27502750
}
27512751

27522752
SmallVector<Value *, 4> Ops;
2753+
llvm::Value *Align = 0; // Alignment for load/store
27532754
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
2755+
if (i == 0) {
2756+
switch (BuiltinID) {
2757+
case AArch64::BI__builtin_neon_vst1_x2_v:
2758+
case AArch64::BI__builtin_neon_vst1q_x2_v:
2759+
case AArch64::BI__builtin_neon_vst1_x3_v:
2760+
case AArch64::BI__builtin_neon_vst1q_x3_v:
2761+
case AArch64::BI__builtin_neon_vst1_x4_v:
2762+
case AArch64::BI__builtin_neon_vst1q_x4_v:
2763+
// Get the alignment for the argument in addition to the value;
2764+
// we'll use it later.
2765+
std::pair<llvm::Value *, unsigned> Src =
2766+
EmitPointerWithAlignment(E->getArg(0));
2767+
Ops.push_back(Src.first);
2768+
Align = Builder.getInt32(Src.second);
2769+
continue;
2770+
}
2771+
}
2772+
if (i == 1) {
2773+
switch (BuiltinID) {
2774+
case AArch64::BI__builtin_neon_vld1_x2_v:
2775+
case AArch64::BI__builtin_neon_vld1q_x2_v:
2776+
case AArch64::BI__builtin_neon_vld1_x3_v:
2777+
case AArch64::BI__builtin_neon_vld1q_x3_v:
2778+
case AArch64::BI__builtin_neon_vld1_x4_v:
2779+
case AArch64::BI__builtin_neon_vld1q_x4_v:
2780+
// Get the alignment for the argument in addition to the value;
2781+
// we'll use it later.
2782+
std::pair<llvm::Value *, unsigned> Src =
2783+
EmitPointerWithAlignment(E->getArg(1));
2784+
Ops.push_back(Src.first);
2785+
Align = Builder.getInt32(Src.second);
2786+
continue;
2787+
}
2788+
}
27542789
Ops.push_back(EmitScalarExpr(E->getArg(i)));
27552790
}
27562791

@@ -3084,6 +3119,57 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
30843119
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
30853120
case AArch64::BI__builtin_neon_vst4q_v:
30863121
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
3122+
case AArch64::BI__builtin_neon_vld1_x2_v:
3123+
case AArch64::BI__builtin_neon_vld1q_x2_v:
3124+
case AArch64::BI__builtin_neon_vld1_x3_v:
3125+
case AArch64::BI__builtin_neon_vld1q_x3_v:
3126+
case AArch64::BI__builtin_neon_vld1_x4_v:
3127+
case AArch64::BI__builtin_neon_vld1q_x4_v: {
3128+
unsigned Int;
3129+
switch (BuiltinID) {
3130+
case AArch64::BI__builtin_neon_vld1_x2_v:
3131+
case AArch64::BI__builtin_neon_vld1q_x2_v:
3132+
Int = Intrinsic::aarch64_neon_vld1x2;
3133+
break;
3134+
case AArch64::BI__builtin_neon_vld1_x3_v:
3135+
case AArch64::BI__builtin_neon_vld1q_x3_v:
3136+
Int = Intrinsic::aarch64_neon_vld1x3;
3137+
break;
3138+
case AArch64::BI__builtin_neon_vld1_x4_v:
3139+
case AArch64::BI__builtin_neon_vld1q_x4_v:
3140+
Int = Intrinsic::aarch64_neon_vld1x4;
3141+
break;
3142+
}
3143+
Function *F = CGM.getIntrinsic(Int, Ty);
3144+
Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
3145+
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3146+
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3147+
return Builder.CreateStore(Ops[1], Ops[0]);
3148+
}
3149+
case AArch64::BI__builtin_neon_vst1_x2_v:
3150+
case AArch64::BI__builtin_neon_vst1q_x2_v:
3151+
case AArch64::BI__builtin_neon_vst1_x3_v:
3152+
case AArch64::BI__builtin_neon_vst1q_x3_v:
3153+
case AArch64::BI__builtin_neon_vst1_x4_v:
3154+
case AArch64::BI__builtin_neon_vst1q_x4_v: {
3155+
Ops.push_back(Align);
3156+
unsigned Int;
3157+
switch (BuiltinID) {
3158+
case AArch64::BI__builtin_neon_vst1_x2_v:
3159+
case AArch64::BI__builtin_neon_vst1q_x2_v:
3160+
Int = Intrinsic::aarch64_neon_vst1x2;
3161+
break;
3162+
case AArch64::BI__builtin_neon_vst1_x3_v:
3163+
case AArch64::BI__builtin_neon_vst1q_x3_v:
3164+
Int = Intrinsic::aarch64_neon_vst1x3;
3165+
break;
3166+
case AArch64::BI__builtin_neon_vst1_x4_v:
3167+
case AArch64::BI__builtin_neon_vst1q_x4_v:
3168+
Int = Intrinsic::aarch64_neon_vst1x4;
3169+
break;
3170+
}
3171+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
3172+
}
30873173

30883174
// Crypto
30893175
case AArch64::BI__builtin_neon_vaeseq_v:

0 commit comments

Comments
 (0)