Skip to content

Commit

Permalink
[Power10] Implement custom codegen for the vec_replace_elt and vec_re…
Browse files Browse the repository at this point in the history
…place_unaligned builtins.

This patch implements custom codegen for the vec_replace_elt and
vec_replace_unaligned builtins.

These builtins map to the @llvm.ppc.altivec.vinsw and @llvm.ppc.altivec.vinsd
intrinsics depending on the arguments. The main motivation for doing custom
codegen for these intrinsics is because there are float and double versions of
the builtin. Normally, the converting the float to an integer would be done via
fptoui in the IR. This is incorrect as fptoui truncates the value and we must
ensure the value is not truncated. Therefore, we provide custom codegen to utilize
bitcast instead as bitcasts do not truncate.

Differential Revision: https://reviews.llvm.org/D83500
  • Loading branch information
amy-kwan committed Sep 24, 2020
1 parent d1aa143 commit 6b136b1
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 0 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsPPC.def
Expand Up @@ -409,6 +409,8 @@ BUILTIN(__builtin_altivec_vinshvlx, "V8UsV8UsUiV8Us", "")
BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsUiV8Us", "")
BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiUiV4Ui", "")
BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiUiV4Ui", "")
BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")

// P10 Vector Extract built-ins.
BUILTIN(__builtin_altivec_vextdubvlx, "V2ULLiV16UcV16UcUi", "")
Expand Down
57 changes: 57 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -14224,6 +14224,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
return Builder.CreateCall(F, {X, Undef});
}
case PPC::BI__builtin_altivec_vec_replace_elt:
case PPC::BI__builtin_altivec_vec_replace_unaligned: {
// The third argument of vec_replace_elt and vec_replace_unaligned must
// be a compile time constant and will be emitted either to the vinsw
// or vinsd instruction.
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
assert(ArgCI &&
"Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Function *F = nullptr;
Value *Call = nullptr;
int64_t ConstArg = ArgCI->getSExtValue();
unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
bool Is32Bit = false;
assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
// The input to vec_replace_elt is an element index, not a byte index.
if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
ConstArg *= ArgWidth / 8;
if (ArgWidth == 32) {
Is32Bit = true;
// When the second argument is 32 bits, it can either be an integer or
// a float. The vinsw intrinsic is used in this case.
F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
// Fix the constant according to endianess.
if (getTarget().isLittleEndian())
ConstArg = 12 - ConstArg;
} else {
// When the second argument is 64 bits, it can either be a long long or
// a double. The vinsd intrinsic is used in this case.
F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
// Fix the constant for little endian.
if (getTarget().isLittleEndian())
ConstArg = 8 - ConstArg;
}
Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
// Depending on ArgWidth, the input vector could be a float or a double.
// If the input vector is a float type, bitcast the inputs to integers. Or,
// if the input vector is a double, bitcast the inputs to 64-bit integers.
if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
Ops[0] = Builder.CreateBitCast(
Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
: llvm::FixedVectorType::get(Int64Ty, 2));
Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
}
// Emit the call to vinsw or vinsd.
Call = Builder.CreateCall(F, Ops);
// Depending on the builtin, bitcast to the approriate result type.
if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
!Ops[1]->getType()->isIntegerTy())
return Builder.CreateBitCast(Call, ResultType);
else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
Ops[1]->getType()->isIntegerTy())
return Call;
else
return Builder.CreateBitCast(Call,
llvm::FixedVectorType::get(Int8Ty, 16));
}
case PPC::BI__builtin_altivec_vpopcntb:
case PPC::BI__builtin_altivec_vpopcnth:
case PPC::BI__builtin_altivec_vpopcntw:
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Headers/altivec.h
Expand Up @@ -17837,6 +17837,14 @@ vec_blendv(vector double __a, vector double __b,
return __builtin_vsx_xxblendvd(__a, __b, __c);
}

/* vec_replace_elt */

#define vec_replace_elt __builtin_altivec_vec_replace_elt

/* vec_replace_unaligned */

#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned

/* vec_splati */

#define vec_splati(__a) \
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Expand Up @@ -2570,6 +2570,17 @@ static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) {
dyn_cast<ArraySubscriptExpr>(Arg->IgnoreParens()));
}

static bool isEltOfVectorTy(ASTContext &Context, CallExpr *Call, Sema &S,
QualType VectorTy, QualType EltTy) {
QualType VectorEltTy = VectorTy->castAs<VectorType>()->getElementType();
if (!Context.hasSameType(VectorEltTy, EltTy)) {
S.Diag(Call->getBeginLoc(), diag::err_typecheck_call_different_arg_types)
<< Call->getSourceRange() << VectorEltTy << EltTy;
return false;
}
return true;
}

static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) {
QualType ArgType = Arg->getType();
if (ArgType->getAsPlaceholderType())
Expand Down Expand Up @@ -3222,6 +3233,14 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
return SemaVSXCheck(TheCall);
case PPC::BI__builtin_altivec_vgnb:
return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
case PPC::BI__builtin_altivec_vec_replace_elt:
case PPC::BI__builtin_altivec_vec_replace_unaligned: {
QualType VecTy = TheCall->getArg(0)->getType();
QualType EltTy = TheCall->getArg(1)->getType();
unsigned Width = Context.getIntWidth(EltTy);
return SemaBuiltinConstantArgRange(TheCall, 2, 0, Width == 32 ? 12 : 8) ||
!isEltOfVectorTy(Context, TheCall, *this, VecTy, EltTy);
}
case PPC::BI__builtin_vsx_xxeval:
return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
case PPC::BI__builtin_altivec_vsldbi:
Expand Down
123 changes: 123 additions & 0 deletions clang/test/CodeGen/builtins-ppc-p10vector.c
Expand Up @@ -21,6 +21,9 @@ vector signed __int128 vsi128a, vsi128b;
vector unsigned __int128 vui128a, vui128b, vui128c;
vector float vfa, vfb;
vector double vda, vdb;
float fa;
double da;
signed int sia;
signed int *iap;
unsigned int uia, uib, *uiap;
signed char *cap;
Expand Down Expand Up @@ -1011,6 +1014,126 @@ vector double test_vec_blend_d(void) {
return vec_blendv(vda, vdb, vullc);
}

vector signed int test_vec_replace_elt_si(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
// CHECK-BE-NEXT: ret <4 x i32>
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
// CHECK-LE-NEXT: ret <4 x i32>
return vec_replace_elt(vsia, sia, 0);
}

vector unsigned int test_vec_replace_elt_ui(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
// CHECK-BE-NEXT: ret <4 x i32>
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
// CHECK-LE-NEXT: ret <4 x i32>
return vec_replace_elt(vuia, uia, 1);
}

vector float test_vec_replace_elt_f(void) {
// CHECK-BE: bitcast float %{{.+}} to i32
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
// CHECK-BE-NEXT: ret <4 x float>
// CHECK-LE: bitcast float %{{.+}} to i32
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
// CHECK-LE-NEXT: ret <4 x float>
return vec_replace_elt(vfa, fa, 2);
}

vector signed long long test_vec_replace_elt_sll(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
// CHECK-BE-NEXT: ret <2 x i64>
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
// CHECK-LE-NEXT: ret <2 x i64>
return vec_replace_elt(vslla, llb, 0);
}

vector unsigned long long test_vec_replace_elt_ull(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
// CHECK-BE-NEXT: ret <2 x i64>
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
// CHECK-LE-NEXT: ret <2 x i64>
return vec_replace_elt(vulla, ulla, 0);
}

vector double test_vec_replace_elt_d(void) {
// CHECK-BE: bitcast double %{{.+}} to i64
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
// CHECK-BE-NEXT: ret <2 x double>
// CHECK-LE: bitcast double %{{.+}} to i64
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
// CHECK-LE-NEXT: ret <2 x double>
return vec_replace_elt(vda, da, 1);
}

vector unsigned char test_vec_replace_unaligned_si(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vsia, sia, 6);
}

vector unsigned char test_vec_replace_unaligned_ui(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vuia, uia, 8);
}

vector unsigned char test_vec_replace_unaligned_f(void) {
// CHECK-BE: bitcast float %{{.+}} to i32
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: bitcast float %{{.+}} to i32
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vfa, fa, 12);
}

vector unsigned char test_vec_replace_unaligned_sll(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vslla, llb, 6);
}

vector unsigned char test_vec_replace_unaligned_ull(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vulla, ulla, 7);
}

vector unsigned char test_vec_replace_unaligned_d(void) {
// CHECK-BE: bitcast double %{{.+}} to i64
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-BE-NEXT: ret <16 x i8>
// CHECK-LE: bitcast double %{{.+}} to i64
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
// CHECK-LE-NEXT: ret <16 x i8>
return vec_replace_unaligned(vda, da, 8);
}

vector unsigned char test_vec_insertl_uc(void) {
// CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
// CHECK-BE-NEXT: ret <16 x i8>
Expand Down
81 changes: 81 additions & 0 deletions clang/test/CodeGen/builtins-ppc-vec-ins-error.c
@@ -0,0 +1,81 @@
// REQUIRES: powerpc-registered-target

// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only %s -verify
// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
// RUN: -triple powerpc64-unknown-unknown -fsyntax-only %s -verify

#include <altivec.h>

vector signed int vsia;
vector unsigned int vuia;
vector signed long long vslla;
vector unsigned long long vulla;
vector float vfa;
vector double vda;
signed int sia;
unsigned int uia;
signed long long slla;
unsigned long long ulla;
float fa;
double da;

vector signed int test_vec_replace_elt_si(void) {
return vec_replace_elt(vsia, sia, 13); // expected-error {{argument value 13 is outside the valid range [0, 12]}}
}

vector unsigned int test_vec_replace_elt_ui(void) {
return vec_replace_elt(vuia, sia, 1); // expected-error {{arguments are of different types ('unsigned int' vs 'int')}}
}

vector float test_vec_replace_elt_f(void) {
return vec_replace_elt(vfa, fa, 20); // expected-error {{argument value 20 is outside the valid range [0, 12]}}
}

vector float test_vec_replace_elt_f_2(void) {
return vec_replace_elt(vfa, da, 0); // expected-error {{arguments are of different types ('float' vs 'double')}}
}

vector signed long long test_vec_replace_elt_sll(void) {
return vec_replace_elt(vslla, slla, 9); // expected-error {{argument value 9 is outside the valid range [0, 8]}}
}

vector unsigned long long test_vec_replace_elt_ull(void) {
return vec_replace_elt(vulla, vda, 0); // expected-error {{arguments are of different types ('unsigned long long' vs '__vector double' (vector of 2 'double' values))}}
}

vector unsigned long long test_vec_replace_elt_ull_2(void) {
return vec_replace_elt(vulla, vulla, vsia); // expected-error {{argument to '__builtin_altivec_vec_replace_elt' must be a constant integer}}
}

vector double test_vec_replace_elt_d(void) {
return vec_replace_elt(vda, da, 33); // expected-error {{argument value 33 is outside the valid range [0, 8]}}
}

vector unsigned char test_vec_replace_unaligned_si(void) {
return vec_replace_unaligned(vsia, da, 6); // expected-error {{arguments are of different types ('int' vs 'double')}}
}

vector unsigned char test_vec_replace_unaligned_ui(void) {
return vec_replace_unaligned(vuia, uia, 14); // expected-error {{argument value 14 is outside the valid range [0, 12]}}
}

vector unsigned char test_vec_replace_unaligned_f(void) {
return vec_replace_unaligned(vfa, fa, 19); // expected-error {{argument value 19 is outside the valid range [0, 12]}}
}

vector unsigned char test_vec_replace_unaligned_sll(void) {
return vec_replace_unaligned(vslla, fa, 0); // expected-error {{arguments are of different types ('long long' vs 'float')}}
}

vector unsigned char test_vec_replace_unaligned_ull(void) {
return vec_replace_unaligned(vulla, ulla, 12); // expected-error {{argument value 12 is outside the valid range [0, 8]}}
}

vector unsigned char test_vec_replace_unaligned_d(void) {
return vec_replace_unaligned(vda, fa, 8); // expected-error {{arguments are of different types ('double' vs 'float')}}
}

vector unsigned char test_vec_replace_unaligned_d_2(void) {
return vec_replace_unaligned(vda, vda, da); // expected-error {{argument to '__builtin_altivec_vec_replace_unaligned' must be a constant integer}}
}

0 comments on commit 6b136b1

Please sign in to comment.